Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / XMLFileAsList.java @ 46178

History | View | Annotate | Download (13.2 KB)

1
/*
2
 * To change this license header, choose License Headers in Project Properties.
3
 * To change this template file, choose Tools | Templates
4
 * and open the template in the editor.
5
 */
6
package org.gvsig.fmap.dal.store.csv;
7

    
8
import java.io.BufferedReader;
9
import java.io.Closeable;
10
import java.io.File;
11
import java.io.IOException;
12
import java.nio.ByteBuffer;
13
import java.nio.CharBuffer;
14
import java.nio.charset.Charset;
15
import java.util.AbstractList;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Objects;
21
import javax.xml.parsers.SAXParser;
22
import javax.xml.parsers.SAXParserFactory;
23
import org.apache.commons.io.IOUtils;
24
import org.apache.commons.lang3.StringUtils;
25
import org.apache.commons.lang3.tuple.Pair;
26
import org.gvsig.fmap.dal.store.csv.RecordsFile.Record;
27
import org.gvsig.fmap.dal.store.csv.RecordsFile.RecordType;
28
import static org.gvsig.fmap.dal.store.csv.RecordsFile.RecordTypeBuilder.recordTypeBuilder;
29
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader;
30
import org.gvsig.tools.util.GetItemWithSize64;
31
import org.xml.sax.Attributes;
32
import org.xml.sax.InputSource;
33
import org.xml.sax.Locator;
34
import org.xml.sax.SAXException;
35
import org.xml.sax.helpers.DefaultHandler;
36

    
37
/**
38
 *
39
 * @author jjdelcerro
40
 */
41
@SuppressWarnings("UseSpecificCatch")
42
public class XMLFileAsList
43
        extends AbstractList<List<String>>
44
        implements Closeable, GetItemWithSize64<List<String>> {
45

    
46
    private static final int IDXFIELD_LINE = 0;
47
    private static final int IDXFIELD_COLUMN = 1;
48
    private static final int IDXFIELD_LINEPOS = 2;
49
    private static final int IDXFIELD_RECORDPOS = 3;
50

    
51
    private final RandomAccessFileReader reader;
52
    private final String recordPath;
53
    private RecordsFile index;
54
    private List<String> fieldPaths;
55

    
56
    
57
    //                                                         ruta   recordnum                       ruta   recordnum
58
 // public XMLFileAsList(File text, Charset charset, List<Pair<String,Integer>> recordPath, List<Pair<String,Integer>> fieldPaths) throws IOException {
59
    public XMLFileAsList(File text, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
60
        this.reader = new RandomAccessFileReader(text, charset);
61
        this.index = null;
62
        this.recordPath = recordPath;
63
    }
64

    
65
    public XMLFileAsList(File text, File index, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
66
        this.reader = new RandomAccessFileReader(text, charset);
67
        if (index.exists()) {
68
            // TODO: Force to create index if text newer than index
69
            this.index = new RecordsFileImpl(index);
70
        } else {
71
            this.createIndex(index);
72
        }
73
        this.recordPath = recordPath;
74
    }
75

    
76
    public XMLFileAsList(RandomAccessFileReader reader, RecordsFile index, String recordPath, List<String> fieldPaths) throws IOException {
77
        this.reader = reader;
78
        this.index = index;
79
        this.recordPath = recordPath;
80
    }
81

    
82
    @Override
83
    public void close() {
84
        IOUtils.closeQuietly(this.reader);
85
        IOUtils.closeQuietly(this.index);
86
    }
87

    
88
    private List<String> getRecord(long position, List<String> fieldPaths) {
89
        class StopParserSAXException extends SAXException {
90
        }
91

    
92
        class ParseRecordsHandler extends DefaultHandler {
93

    
94
            Locator locator;
95
            List<String> path = new ArrayList<>();
96
            Map<String, String> record = new HashMap<>();
97
            StringBuilder value = new StringBuilder();
98
            List<String> values = new ArrayList<>();
99

    
100
            @Override
101
            public void setDocumentLocator(Locator locator) {
102
                this.locator = locator;
103
            }
104

    
105
            @Override
106
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
107
                path.add(localName);
108
                String path_s = StringUtils.join(path, "/");
109
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
110
                    return;
111
                }
112
                this.value.setLength(0);
113
//                    this.value.trimToSize();
114
            }
115

    
116
            @Override
117
            public void characters(char[] ch, int start, int length) throws SAXException {
118
                value.append(new String(ch, start, length));
119
            }
120

    
121
            @Override
122
            public void endElement(String uri, String localName, String qName) throws SAXException {
123
                String path_s = StringUtils.join(path, "/");
124

    
125
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
126
                    for (String fieldPath : fieldPaths) {
127
                        values.add(record.get(fieldPath));
128
                    }
129
                    throw new StopParserSAXException();
130
                } else {
131
                    for (String fieldPath : fieldPaths) {
132
                        if (StringUtils.equalsIgnoreCase(path_s, fieldPath)) {
133
                            record.put(fieldPath, this.value.toString());
134
                            // No break to handle repeated columns
135
                        }
136
                    }
137
                }
138

    
139
                path.remove(path.size() - 1);
140
            }
141
        }
142

    
143
        ParseRecordsHandler handler = null;
144
        try {
145
            Record record = this.index.get64(position);
146
            long recordPosition = record.getLong(IDXFIELD_RECORDPOS);
147
            this.reader.seek(recordPosition);
148
            BufferedReader breader = new BufferedReader(this.reader, 1024 * 8);
149

    
150
            InputSource is = new InputSource(breader);
151
            SAXParserFactory spf = SAXParserFactory.newInstance();
152
            spf.setNamespaceAware(true);
153
            SAXParser saxParser = spf.newSAXParser();
154
            handler = new ParseRecordsHandler();
155

    
156
            saxParser.parse(is, handler);
157
        } catch (StopParserSAXException ex) {
158
            if (handler != null) {
159
                return handler.values;
160
            }
161
        } catch (Exception ex) {
162
            throw new RuntimeException("Can't parse record " + position, ex);
163
        }
164
        return null; // ?????
165
    }
166

    
167
    @Override
168
    public List<String> get(int index) {
169
        return this.getRecord(index, fieldPaths);
170
    }
171

    
172
    @Override
173
    public List<String> get64(long position) {
174
        return this.getRecord(position, fieldPaths);
175
    }
176

    
177
    @Override
178
    public int size() {
179
        return this.index.size();
180
    }
181

    
182
    @Override
183
    public long size64() {
184
        return this.index.size64();
185
    }
186

    
187
    final public void createIndex(File indexFile) throws IOException {
188
        try {
189
            // 1. Creamos el indice vacio
190
            RecordType recordType = recordTypeBuilder()
191
                    .addLong() // IDXFIELD_LINE
192
                    .addLong() // IDXFIELD_COLUMN
193
                    .addLong() // IDXFIELD_LINEPOS
194
                    .addLong() // IDXFIELD_RECORDPOS
195
                    .build();
196
            Record record = recordType.createRecord();
197
            final RecordsFile theIndex = new RecordsFileImpl();
198
            theIndex.create(indexFile, recordType, 0);
199

    
200
            // 2. Rellenamos los campos numero de linea y columna de cada registro 
201
            //    en el indice.
202
            // Para ello nos recorremos el XML y vamos escribiendo en el indice.
203
            this.reader.rewind();
204
            InputSource is = new InputSource(reader);
205
            SAXParserFactory spf = SAXParserFactory.newInstance();
206
            spf.setNamespaceAware(true);
207
            SAXParser saxParser = spf.newSAXParser();
208

    
209
            List<String> path = new ArrayList<>();
210
            // TODO: Aqui habria que quedarse con la primera aparicion de
211
            // los campos solicitados que no pertenezcan al registro principal.
212
            // Probablemente habria que guardar esos valores en disco junto al
213
            // fichero de indice.
214
            saxParser.parse(is, new DefaultHandler() {
215
                Locator locator;
216

    
217
                @Override
218
                public void setDocumentLocator(Locator locator) {
219
                    this.locator = locator;
220
                }
221

    
222
                @Override
223
                public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
224
                    path.add(localName);
225
                    String path_s = StringUtils.join(path, "/");
226

    
227
                    if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
228
                        record.setLong(IDXFIELD_LINE, locator.getLineNumber() - 1);
229
                        record.setLong(IDXFIELD_COLUMN, locator.getColumnNumber() - 1);
230
                        record.setLong(IDXFIELD_LINEPOS, 0);
231
                        record.setLong(IDXFIELD_RECORDPOS, 0);
232
                        theIndex.add(record);
233
                    }
234
                }
235

    
236
                @Override
237
                public void endElement(String uri, String localName, String qName) throws SAXException {
238
                    path.remove(path.size() - 1);
239
                }
240
            });
241

    
242
            // 3. Nos recorremos el indice y calculamos la posicion de la linea
243
            //    para cada registro.
244
            this.reader.rewind();
245
            PositionCalculator positionCalculator = new PositionCalculator(reader);
246

    
247
            for (int i = 0; i < theIndex.size(); i++) {
248
                Record r = theIndex.get(i);
249
                positionCalculator.next(r.getLong(IDXFIELD_LINE), r.getLong(IDXFIELD_COLUMN));
250
                r.setLong(IDXFIELD_LINEPOS, positionCalculator.getLinePosition());
251
                r.setLong(IDXFIELD_RECORDPOS, positionCalculator.getColumnPosition());
252
                theIndex.set(i, record);
253
            }
254
            this.index = new RecordsFileImpl(indexFile);
255
        } catch (Exception ex) {
256
            throw new IOException("Can't create index " + Objects.toString(indexFile), ex);
257
        }
258
    }
259

    
260
    private static class CharacterSize {
261

    
262
        private final Charset charset;
263
        private final CharBuffer charBuffer;
264

    
265
        public CharacterSize(Charset charset) {
266
            this.charset = charset;
267
            this.charBuffer = CharBuffer.allocate(1);
268
        }
269

    
270
        public int size(char ch) {
271
            this.charBuffer.put(0, ch);
272
            this.charBuffer.position(0);
273
            ByteBuffer buffer = this.charset.encode(this.charBuffer);
274
            return buffer.limit();
275
        }
276

    
277
        public int size(char[] cbuf, int off, int len) {
278
            CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
279
            ByteBuffer byteBuffer = this.charset.encode(cb);
280
            return byteBuffer.limit();
281
        }
282

    
283
        public int size(char[] cbuf) {
284
            CharBuffer cb = CharBuffer.wrap(cbuf, 0, cbuf.length);
285
            ByteBuffer byteBuffer = this.charset.encode(cb);
286
            return byteBuffer.limit();
287
        }
288

    
289
        public int size(String s) {
290
            CharBuffer cb = CharBuffer.wrap(s);
291
            ByteBuffer byteBuffer = this.charset.encode(cb);
292
            return byteBuffer.limit();
293
        }
294
    }
295

    
296
    private static class PositionCalculator {
297

    
298
        private final RandomAccessFileReader reader;
299
        private final BufferedReader breader;
300
        private long currentPosition;
301
        private long currentColumn;
302
        private long currentLine;
303
        private long currentColumnPosition;
304
        private long currentLinePosition;
305

    
306
        private final char[] ch;
307
        private final CharacterSize characterSize;
308

    
309
        public PositionCalculator(RandomAccessFileReader reader) {
310
            this.reader = reader;
311
            this.breader = new BufferedReader(this.reader, 1024 * 8);
312
            this.currentPosition = this.reader.getCurrentPosition();
313
            this.currentLine = 0;
314
            this.currentColumn = 0;
315
            this.ch = new char[1];
316
            this.characterSize = new CharacterSize(this.reader.getCharset());
317
        }
318

    
319
        public boolean next(long line, long column) throws IOException {
320
            while (this.currentLine < line) {
321
                if (breader.read(this.ch, 0, 1) < 1) {
322
                    return false;
323
                }
324
                char c = ch[0];
325
                this.currentPosition += characterSize.size(c);
326
                if (c == '\n') {
327
                    this.currentLine++;
328
                    this.currentColumn = 0;
329
                }
330
            }
331
            this.currentLinePosition = this.currentPosition;
332
            while (this.currentColumn < column) {
333
                if (breader.read(this.ch, 0, 1) < 1) {
334
                    return false;
335
                }
336
                char c = ch[0];
337
                this.currentPosition += characterSize.size(c);
338
                if (c == '\n') {
339
                    // Uff, esto seria un error, ya que la linea corriente no
340
                    // tiene tantas columnas.
341
                    throw new IOException("Illegal column number " + column + " for line " + line);
342
                }
343
                this.currentColumn++;
344
            }
345
            this.currentColumnPosition = this.currentPosition;
346
            return true;
347
        }
348

    
349
        public long getLinePosition() {
350
            return this.currentLinePosition;
351
        }
352

    
353
        public long getColumnPosition() {
354
            return this.currentColumnPosition;
355
        }
356

    
357
    }
358

    
359
}