Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / XMLFileAsList.java @ 47617

History | View | Annotate | Download (14.2 KB)

1
/*
2
 * To change this license header, choose License Headers in Project Properties.
3
 * To change this template file, choose Tools | Templates
4
 * and open the template in the editor.
5
 */
6
package org.gvsig.fmap.dal.store.csv;
7

    
8
import java.io.BufferedReader;
9
import java.io.Closeable;
10
import java.io.File;
11
import java.io.IOException;
12
import java.nio.ByteBuffer;
13
import java.nio.CharBuffer;
14
import java.nio.charset.Charset;
15
import java.nio.charset.StandardCharsets;
16
import java.util.AbstractList;
17
import java.util.ArrayList;
18
import java.util.HashMap;
19
import java.util.List;
20
import java.util.Map;
21
import java.util.Objects;
22
import javax.xml.parsers.SAXParser;
23
import javax.xml.parsers.SAXParserFactory;
24
import org.apache.commons.io.FilenameUtils;
25
import org.apache.commons.io.IOUtils;
26
import org.apache.commons.lang3.StringUtils;
27
import org.apache.commons.lang3.tuple.Pair;
28
import org.gvsig.fmap.dal.store.csv.RecordsFile.Record;
29
import org.gvsig.fmap.dal.store.csv.RecordsFile.RecordType;
30
import static org.gvsig.fmap.dal.store.csv.RecordsFile.RecordTypeBuilder.recordTypeBuilder;
31
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader;
32
import org.gvsig.tools.util.GetItemWithSize64;
33
import org.gvsig.tools.util.ListBuilder;
34
import org.xml.sax.Attributes;
35
import org.xml.sax.InputSource;
36
import org.xml.sax.Locator;
37
import org.xml.sax.SAXException;
38
import org.xml.sax.helpers.DefaultHandler;
39

    
40
/**
41
 *
42
 * @author jjdelcerro
43
 */
44
@SuppressWarnings("UseSpecificCatch")
45
public class XMLFileAsList
46
        extends AbstractList<List<String>>
47
        implements Closeable, GetItemWithSize64<List<String>> {
48

    
49
    private static final int IDXFIELD_LINE = 0;
50
    private static final int IDXFIELD_COLUMN = 1;
51
    private static final int IDXFIELD_LINEPOS = 2;
52
    private static final int IDXFIELD_RECORDPOS = 3;
53

    
54
    private final RandomAccessFileReader reader;
55
    private final String recordPath;
56
    private RecordsFile index;
57
    private List<String> fieldPaths;
58

    
59
    
60
    //                                                         ruta   recordnum                       ruta   recordnum
61
 // public XMLFileAsList(File text, Charset charset, List<Pair<String,Integer>> recordPath, List<Pair<String,Integer>> fieldPaths) throws IOException {
62
    public XMLFileAsList(File text, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
63
        this.reader = new RandomAccessFileReader(text, charset);
64
        this.index = null;
65
        this.recordPath = recordPath;
66
    }
67

    
68
    public XMLFileAsList(File text, File index, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
69
        this.reader = new RandomAccessFileReader(text, charset);
70
        if (index.exists()) {
71
            // TODO: Force to create index if text newer than index
72
            this.index = new RecordsFileImpl(index);
73
        } else {
74
            this.createIndex(index);
75
        }
76
        this.recordPath = recordPath;
77
    }
78

    
79
    public XMLFileAsList(RandomAccessFileReader reader, RecordsFile index, String recordPath, List<String> fieldPaths) throws IOException {
80
        this.reader = reader;
81
        this.index = index;
82
        this.recordPath = recordPath;
83
    }
84

    
85
    @Override
86
    public void close() {
87
        IOUtils.closeQuietly(this.reader);
88
        IOUtils.closeQuietly(this.index);
89
    }
90

    
91
    private List<String> getRecord(long position, List<String> fieldPaths) {
92
        class StopParserSAXException extends SAXException {
93
        }
94

    
95
        class ParseRecordsHandler extends DefaultHandler {
96

    
97
            Locator locator;
98
            List<String> path = new ArrayList<>();
99
            Map<String, String> record = new HashMap<>();
100
            StringBuilder value = new StringBuilder();
101
            List<String> values = new ArrayList<>();
102

    
103
            @Override
104
            public void setDocumentLocator(Locator locator) {
105
                this.locator = locator;
106
            }
107

    
108
            @Override
109
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
110
                path.add(localName);
111
                String path_s = StringUtils.join(path, "/");
112
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
113
                    return;
114
                }
115
                this.value.setLength(0);
116
//                    this.value.trimToSize();
117
            }
118

    
119
            @Override
120
            public void characters(char[] ch, int start, int length) throws SAXException {
121
                value.append(new String(ch, start, length));
122
            }
123

    
124
            @Override
125
            public void endElement(String uri, String localName, String qName) throws SAXException {
126
                String path_s = StringUtils.join(path, "/");
127

    
128
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
129
                    for (String fieldPath : fieldPaths) {
130
                        values.add(record.get(fieldPath));
131
                    }
132
                    throw new StopParserSAXException();
133
                } else {
134
                    for (String fieldPath : fieldPaths) {
135
                        if (StringUtils.equalsIgnoreCase(path_s, fieldPath)) {
136
                            record.put(fieldPath, this.value.toString());
137
                            // No break to handle repeated columns
138
                        }
139
                    }
140
                }
141

    
142
                path.remove(path.size() - 1);
143
            }
144
        }
145

    
146
        ParseRecordsHandler handler = null;
147
        try {
148
            Record record = this.index.get64(position);
149
            long recordPosition = record.getLong(IDXFIELD_RECORDPOS);
150
            this.reader.seek(recordPosition);
151
            BufferedReader breader = new BufferedReader(this.reader, 1024 * 8);
152

    
153
            InputSource is = new InputSource(breader);
154
            SAXParserFactory spf = SAXParserFactory.newInstance();
155
            spf.setNamespaceAware(true);
156
            SAXParser saxParser = spf.newSAXParser();
157
            handler = new ParseRecordsHandler();
158

    
159
            saxParser.parse(is, handler);
160
        } catch (StopParserSAXException ex) {
161
            if (handler != null) {
162
                return handler.values;
163
            }
164
        } catch (Exception ex) {
165
            throw new RuntimeException("Can't parse record " + position, ex);
166
        }
167
        return null; // ?????
168
    }
169

    
170
    @Override
171
    public List<String> get(int index) {
172
        return this.getRecord(index, fieldPaths);
173
    }
174

    
175
    @Override
176
    public List<String> get64(long position) {
177
        return this.getRecord(position, fieldPaths);
178
    }
179

    
180
    @Override
181
    public int size() {
182
        return this.index.size();
183
    }
184

    
185
    @Override
186
    public long size64() {
187
        return this.index.size64();
188
    }
189

    
190
    final public void createIndex(File indexFile) throws IOException {
191
        try {
192
            // 1. Creamos el indice vacio
193
            RecordType recordType = recordTypeBuilder()
194
                    .addLong() // IDXFIELD_LINE
195
                    .addLong() // IDXFIELD_COLUMN
196
                    .addLong() // IDXFIELD_LINEPOS
197
                    .addLong() // IDXFIELD_RECORDPOS
198
                    .build();
199
            Record record = recordType.createRecord();
200
            final RecordsFile theIndex = new RecordsFileImpl();
201
            theIndex.create(indexFile, recordType, 0);
202

    
203
            // 2. Rellenamos los campos numero de linea y columna de cada registro 
204
            //    en el indice.
205
            // Para ello nos recorremos el XML y vamos escribiendo en el indice.
206
            this.reader.rewind();
207
            InputSource is = new InputSource(reader);
208
            SAXParserFactory spf = SAXParserFactory.newInstance();
209
            spf.setNamespaceAware(true);
210
            SAXParser saxParser = spf.newSAXParser();
211

    
212
            List<String> path = new ArrayList<>();
213
            // TODO: Aqui habria que quedarse con la primera aparicion de
214
            // los campos solicitados que no pertenezcan al registro principal.
215
            // Probablemente habria que guardar esos valores en disco junto al
216
            // fichero de indice.
217
            saxParser.parse(is, new DefaultHandler() {
218
                Locator locator;
219

    
220
                @Override
221
                public void setDocumentLocator(Locator locator) {
222
                    this.locator = locator;
223
                }
224

    
225
                @Override
226
                public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
227
                    path.add(localName);
228
                    String path_s = StringUtils.join(path, "/");
229

    
230
                    if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
231
                        record.setLong(IDXFIELD_LINE, locator.getLineNumber() - 1);
232
                        record.setLong(IDXFIELD_COLUMN, locator.getColumnNumber() - 1);
233
                        record.setLong(IDXFIELD_LINEPOS, 0);
234
                        record.setLong(IDXFIELD_RECORDPOS, 0);
235
                        theIndex.add(record);
236
                    }
237
                }
238

    
239
                @Override
240
                public void endElement(String uri, String localName, String qName) throws SAXException {
241
                    path.remove(path.size() - 1);
242
                }
243
            });
244

    
245
            // 3. Nos recorremos el indice y calculamos la posicion de la linea
246
            //    para cada registro.
247
            this.reader.rewind();
248
            PositionCalculator positionCalculator = new PositionCalculator(reader);
249

    
250
            for (int i = 0; i < theIndex.size(); i++) {
251
                Record r = theIndex.get(i);
252
                positionCalculator.next(r.getLong(IDXFIELD_LINE), r.getLong(IDXFIELD_COLUMN));
253
                r.setLong(IDXFIELD_LINEPOS, positionCalculator.getLinePosition());
254
                r.setLong(IDXFIELD_RECORDPOS, positionCalculator.getColumnPosition());
255
                theIndex.set(i, record);
256
            }
257
            this.index = new RecordsFileImpl(indexFile);
258
        } catch (Exception ex) {
259
            throw new IOException("Can't create index " + Objects.toString(indexFile), ex);
260
        }
261
    }
262

    
263
    private static class CharacterSize {
264

    
265
        private final Charset charset;
266
        private final CharBuffer charBuffer;
267

    
268
        public CharacterSize(Charset charset) {
269
            this.charset = charset;
270
            this.charBuffer = CharBuffer.allocate(1);
271
        }
272

    
273
        public int size(char ch) {
274
            this.charBuffer.put(0, ch);
275
            this.charBuffer.position(0);
276
            ByteBuffer buffer = this.charset.encode(this.charBuffer);
277
            return buffer.limit();
278
        }
279

    
280
        public int size(char[] cbuf, int off, int len) {
281
            CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
282
            ByteBuffer byteBuffer = this.charset.encode(cb);
283
            return byteBuffer.limit();
284
        }
285

    
286
        public int size(char[] cbuf) {
287
            CharBuffer cb = CharBuffer.wrap(cbuf, 0, cbuf.length);
288
            ByteBuffer byteBuffer = this.charset.encode(cb);
289
            return byteBuffer.limit();
290
        }
291

    
292
        public int size(String s) {
293
            CharBuffer cb = CharBuffer.wrap(s);
294
            ByteBuffer byteBuffer = this.charset.encode(cb);
295
            return byteBuffer.limit();
296
        }
297
    }
298

    
299
    private static class PositionCalculator {
300

    
301
        private final RandomAccessFileReader reader;
302
        private final BufferedReader breader;
303
        private long currentPosition;
304
        private long currentColumn;
305
        private long currentLine;
306
        private long currentColumnPosition;
307
        private long currentLinePosition;
308

    
309
        private final char[] ch;
310
        private final CharacterSize characterSize;
311

    
312
        public PositionCalculator(RandomAccessFileReader reader) {
313
            this.reader = reader;
314
            this.breader = new BufferedReader(this.reader, 1024 * 8);
315
            this.currentPosition = this.reader.getCurrentPosition();
316
            this.currentLine = 0;
317
            this.currentColumn = 0;
318
            this.ch = new char[1];
319
            this.characterSize = new CharacterSize(this.reader.getCharset());
320
        }
321

    
322
        public boolean next(long line, long column) throws IOException {
323
            while (this.currentLine < line) {
324
                if (breader.read(this.ch, 0, 1) < 1) {
325
                    return false;
326
                }
327
                char c = ch[0];
328
                this.currentPosition += characterSize.size(c);
329
                if (c == '\n') {
330
                    this.currentLine++;
331
                    this.currentColumn = 0;
332
                }
333
            }
334
            this.currentLinePosition = this.currentPosition;
335
            while (this.currentColumn < column) {
336
                if (breader.read(this.ch, 0, 1) < 1) {
337
                    return false;
338
                }
339
                char c = ch[0];
340
                this.currentPosition += characterSize.size(c);
341
                if (c == '\n') {
342
                    // Uff, esto seria un error, ya que la linea corriente no
343
                    // tiene tantas columnas.
344
                    throw new IOException("Illegal column number " + column + " for line " + line);
345
                }
346
                this.currentColumn++;
347
            }
348
            this.currentColumnPosition = this.currentPosition;
349
            return true;
350
        }
351

    
352
        public long getLinePosition() {
353
            return this.currentLinePosition;
354
        }
355

    
356
        public long getColumnPosition() {
357
            return this.currentColumnPosition;
358
        }
359

    
360
    }
361

    
362
    public static void main(String[] args) throws Exception {
363
        final String XMLFILE1 = "/home/jjdelcerro/datos/geodata/vector/gml/navarra.gml";
364
        final String XMLFILE2 = "/home/jjdelcerro/datos/geodata/vector/gml/Municipis/Municipis.gml";
365

    
366
        String gmlfile = XMLFILE1;
367
        XMLFileAsList gml = new XMLFileAsList(
368
                new File(gmlfile), 
369
                new File(FilenameUtils.removeExtension(gmlfile)+".gmlidx"), 
370
                StandardCharsets.UTF_8, 
371
                "FeatureCollection/featureMember", 
372
                ListBuilder.create(
373
                        "FEATURE/fid",
374
                        "FEATURE/MERINDAD",
375
                        "FEATURE/Cnt_MERIND",
376
                        "FEATURE/Nombre"
377
                )
378
        );
379
        System.out.println("File: "+gmlfile);
380
        for (int i = 0; i < gml.size(); i++) {
381
            List<String> item = gml.get(i);
382
            System.out.println(item);
383
        }
384
    }
385
}