Revision 45953

View differences:

trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/TestXML.java
1
package org.gvsig.fmap.dal.store.csv;
2

  
3
import java.io.File;
4
import java.io.FileInputStream;
5
import java.io.InputStreamReader;
6
import java.io.Reader;
7
import java.nio.ByteBuffer;
8
import java.nio.CharBuffer;
9
import java.nio.charset.Charset;
10
import java.nio.charset.StandardCharsets;
11
import java.sql.Timestamp;
12
import java.util.ArrayList;
13
import java.util.HashMap;
14
import java.util.LinkedHashSet;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Set;
18
import javax.xml.parsers.SAXParser;
19
import javax.xml.parsers.SAXParserFactory;
20
import org.apache.commons.lang3.StringUtils;
21
import org.apache.commons.lang3.mutable.MutableLong;
22
import org.apache.tika.config.TikaConfig;
23
import org.apache.tika.detect.DefaultEncodingDetector;
24
import org.apache.tika.detect.EncodingDetector;
25
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileIndex;
26
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader;
27
import org.gvsig.tools.ToolsLocator;
28
import org.gvsig.tools.i18n.I18nManager;
29
import org.gvsig.tools.util.ListBuilder;
30
import org.xml.sax.Attributes;
31
import org.xml.sax.InputSource;
32
import org.xml.sax.Locator;
33
import org.xml.sax.SAXException;
34
import org.xml.sax.helpers.DefaultHandler;
35

  
36
public class TestXML {
37
    
38
    private static final String XMLFILE1 = "/home/jjdelcerro/datos/geodata/vector/sigpac/2018/Declaracion.xml";
39
    private static final String XMLFILE2 = "/home/jjdelcerro/datos/geodata/vector/ARENA2/quincenas-0/TV_03_2019_01_Q1/victimas.xml";
40
    private static final String XMLFILE_BIG = "/home/jjdelcerro/datos/geodata/vector/RSUPAC/2020/BDA_RSU_PAC20_1713052020_001.XML";
41
            
42
    public static void main(String[] args) throws Exception {
43
        TestXML t = new TestXML();
44
        
45
        Set<String> tags = t.extractTags(XMLFILE1);
46
        System.out.println("Tags: -------------------------");
47
        for (String tag : tags) {
48
            System.out.println(tag);
49
        }
50
        List<List<String>> records = t.getRecords1(
51
                XMLFILE1, 
52
                "DECLARACION/LINEA_DECLARACION", 
53
                ListBuilder.create(
54
                    "DECLARACION/LINEA_DECLARACION/PROV",
55
//                    "DECLARACION/LINEA_DECLARACION/MUN_INE",
56
//                    "DECLARACION/LINEA_DECLARACION/MUN_CAT",
57
//                    "DECLARACION/LINEA_DECLARACION/AGREGADO",
58
                    "DECLARACION/LINEA_DECLARACION/ZONA",
59
                    "DECLARACION/LINEA_DECLARACION/POLIGONO",
60
                    "DECLARACION/LINEA_DECLARACION/PARCELA",
61
                    "DECLARACION/LINEA_DECLARACION/RECINTO",
62
                    "DECLARACION/LINEA_DECLARACION/PARCELA_AGRICOLA",
63
                    "DECLARACION/LINEA_DECLARACION/CULTIVO",
64
//                    "DECLARACION/LINEA_DECLARACION/WKT",
65
                    "DECLARACION/LINEA_DECLARACION/DN_SURFACE",
66
                    "DECLARACION/LINEA_DECLARACION/SUPERFICIE_DECLARADA"
67
//                    "DECLARACION/LINEA_DECLARACION/FC_ALMENDROS",
68
//                    "DECLARACION/LINEA_DECLARACION/FC_ALGARROBOS",
69
//                    "DECLARACION/LINEA_DECLARACION/FC_AVELLANOS",
70
//                    "DECLARACION/LINEA_DECLARACION/FC_NOGALES",
71
//                    "DECLARACION/LINEA_DECLARACION/FC_PISTACHOS",
72
//                    "DECLARACION/LINEA_DECLARACION/FC_TOTAL"
73
                )
74
        );
75
        for (List<String> record : records) {
76
            System.out.println(StringUtils.join(record, ","));
77
        }
78
    }
79
    
80
    private Reader openFileReader(File xmlfile) throws Exception  {
81
        FileInputStream fis = new FileInputStream(xmlfile);
82
//        EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
83
//        Charset encoding = encodingDetector.detect(fis, null);
84
        Charset encoding = StandardCharsets.UTF_8;
85
        InputStreamReader reader = new InputStreamReader(fis, encoding);
86
        return reader;
87
    }
88
    
89
    private Set<String> extractTags(String xmlfile) throws Exception {
90
        SAXParserFactory spf = SAXParserFactory.newInstance();
91
        spf.setNamespaceAware(true);
92
        SAXParser saxParser = spf.newSAXParser();
93
//        RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
94
        Reader reader = openFileReader(new File(xmlfile));
95
        InputSource is = new InputSource(reader);
96

  
97
        List<String> path = new ArrayList<>();
98
        Set<String> tags = new LinkedHashSet<>();
99

  
100
        System.out.println("Parse (extract-tags): -------------------------");
101
        saxParser.parse(is, new DefaultHandler() {
102
            private Locator locator;
103
            
104
            @Override
105
            public void setDocumentLocator(Locator locator) {
106
                this.locator = locator;
107
            }
108
            
109
            @Override
110
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
111
                    path.add(localName);
112

  
113
                    int line = this.locator.getLineNumber();
114
                    int column = this.locator.getColumnNumber()-2-localName.length();
115
                    String path_s = StringUtils.join(path, "/");
116

  
117
                    tags.add(path_s);
118
                    for (int i = 0; i < attributes.getLength(); i++) {
119
                        String name = attributes.getLocalName(i);
120
                        tags.add(path_s+"/#"+name);
121
                    }
122
                    if( "DECLARACION/LINEA_DECLARACION".equals(StringUtils.join(path, "/")) ) {
123
                        System.out.println(path_s+": "+line+":"+column);
124
                    }
125
                    if( "DECLARACION".equals(StringUtils.join(path, "/")) ) {
126
                        System.out.println(path_s+": "+line+":"+column);
127
                    }
128
            }
129

  
130
            @Override
131
            public void endElement(String uri, String localName, String qName) throws SAXException {
132
                    path.remove(path.size()-1);
133
            }
134
        });
135

  
136
        return tags;
137
    }
138
    
139
    private List<List<String>> getRecords1(String xmlfile, String recordPath, List<String>fieldPaths) throws Exception {
140
        class ParseRecordsHandler extends DefaultHandler {
141
            Locator locator;
142
            List<String> path = new ArrayList<>();
143
            List<List<String>> records = new ArrayList<>();
144
            Map<String,String> record = new HashMap<>();
145
            StringBuilder value = new StringBuilder();
146
            
147
            @Override
148
            public void setDocumentLocator(Locator locator) {
149
                this.locator = locator;
150
            }
151
            
152
            @Override
153
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
154
                    path.add(localName);
155

  
156
                    int line = this.locator.getLineNumber();
157
                    int column = this.locator.getColumnNumber()-2-localName.length();
158
                    String path_s = StringUtils.join(path, "/");
159

  
160
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
161
                        return;
162
                    }
163
                    this.value.setLength(0);
164
//                    this.value.trimToSize();
165
            }
166

  
167
            @Override
168
            public void characters(char[] ch, int start, int length) throws SAXException {
169
                value.append(new String(ch, start, length));
170
            }
171
            
172
            @Override
173
            public void endElement(String uri, String localName, String qName) throws SAXException {
174
                    int line = this.locator.getLineNumber();
175
                    int column = this.locator.getColumnNumber()-2-localName.length();
176
                    String path_s = StringUtils.join(path, "/");
177

  
178
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
179
                        List<String> values = new ArrayList<>();
180
                        for (String fieldPath : fieldPaths) {
181
                            values.add(record.get(fieldPath));
182
                        }
183
                        records.add(values);
184
                        record.clear();
185
                    } else {
186
                        for (String fieldPath : fieldPaths) {
187
                            if( StringUtils.equalsIgnoreCase(path_s, fieldPath) ) {
188
                                record.put(fieldPath, this.value.toString());
189
                                // No break to handle repeated columns
190
                            }
191
                        }
192
                    }
193

  
194
                    path.remove(path.size()-1);
195
            }
196
        }
197
        
198
        SAXParserFactory spf = SAXParserFactory.newInstance();
199
        spf.setNamespaceAware(true);
200
        SAXParser saxParser = spf.newSAXParser();
201
//        RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
202
        Reader reader = openFileReader(new File(xmlfile));
203
        InputSource is = new InputSource(reader);
204

  
205

  
206
        ParseRecordsHandler handler = new ParseRecordsHandler();
207
        
208
        System.out.println("Parse (getRecords): -------------------------");
209
        saxParser.parse(is, handler);
210
        
211
        return handler.records;
212
    }
213

  
214
    private long getRecordCount(String xmlfile, String recordPath) throws Exception {
215
        SAXParserFactory spf = SAXParserFactory.newInstance();
216
        spf.setNamespaceAware(true);
217
        SAXParser saxParser = spf.newSAXParser();
218
        Reader reader = openFileReader(new File(xmlfile));
219
        InputSource is = new InputSource(reader);
220

  
221
        List<String> path = new ArrayList<>();
222
        MutableLong numRecords = new MutableLong(0);
223

  
224
        System.out.println("Parse (countRecords)");
225
        saxParser.parse(is, new DefaultHandler() {
226
            @Override
227
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
228
                    path.add(localName);
229
                    String path_s = StringUtils.join(path, "/");
230

  
231
                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
232
                        numRecords.increment();
233
                    }
234
            }
235

  
236
            @Override
237
            public void endElement(String uri, String localName, String qName) throws SAXException {
238
                    path.remove(path.size()-1);
239
            }
240
        });
241

  
242
        return numRecords.longValue();
243
    }
244
    
245
//    private void createIndex(String xmlfile, String recordPath) throws Exception {
246
//        
247
//        long countRecords = getRecordCount(xmlfile, recordPath);
248
//        if (countRecords < 1) {
249
//            return;
250
//        }
251
//        RandomAccessFileIndex record_idx = new RandomAccessFileIndex();
252
//        record_idx.create(fileIndex, countRecords);
253
//
254
//        SAXParserFactory spf = SAXParserFactory.newInstance();
255
//        spf.setNamespaceAware(true);
256
//        SAXParser saxParser = spf.newSAXParser();
257
//        Reader reader = openFileReader(new File(xmlfile));
258
//        InputSource is = new InputSource(reader);
259
//        List<String> path = new ArrayList<>();
260
//
261
//        System.out.println("Parse (createIndex)");
262
//        saxParser.parse(is, new DefaultHandler() {
263
//            Locator locator;
264
//            
265
//            @Override
266
//            public void setDocumentLocator(Locator locator) {
267
//                this.locator = locator;
268
//            }
269
//            @Override
270
//            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
271
//                    path.add(localName);
272
//                    int line = this.locator.getLineNumber();
273
//                    int column = this.locator.getColumnNumber()-2-localName.length();
274
//                    String path_s = StringUtils.join(path, "/");
275
//
276
//                    if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
277
//                        record_idx.set(lineno++, position);
278
//                    }
279
//            }
280
//
281
//            @Override
282
//            public void endElement(String uri, String localName, String qName) throws SAXException {
283
//                    path.remove(path.size()-1);
284
//            }
285
//        });
286
//
287
//        return numRecords.longValue();
288
//    }
289
    
290
    
291
    
292
    private void test() {
293
    }
294

  
295
}
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/RecordsFile.java
1
package org.gvsig.fmap.dal.store.csv;
2

  
3
import java.io.Closeable;
4
import java.io.File;
5
import java.io.IOException;
6
import java.io.RandomAccessFile;
7
import java.nio.ByteBuffer;
8
import java.sql.Timestamp;
9
import java.util.List;
10
import org.gvsig.fmap.dal.store.csv.RecordsFile.Record;
11
import org.gvsig.tools.util.GetItem;
12
import org.gvsig.tools.util.GetItem64;
13
import org.gvsig.tools.util.Size;
14
import org.gvsig.tools.util.Size64;
15

  
16
/**
17
 *
18
 * @author gvSIG Team
19
 */
20
public interface RecordsFile extends List<Record>, Size, Size64, GetItem<Record>, GetItem64<Record>, Closeable {
21
    
22
    public static final byte TYPE_BYTE = 0;
23
    public static final byte TYPE_SHORT = 1;
24
    public static final byte TYPE_INTEGER = 2;
25
    public static final byte TYPE_LONG = 3;
26
    public static final byte TYPE_DOUBLE = 4;
27
    public static final byte TYPE_TIMESTAMP = 5;
28
    public static final byte TYPE_STRING = 6;
29
    public static final byte TYPE_BYTES = 7;
30

  
31
    public interface Record {
32
        public RecordType getType();
33
        public byte[] getBytes();
34
        public void setBytes(byte[] bytes);
35
        public ByteBuffer getBuffer();
36
        
37
        public int getByte(int n);
38
        public int getShort(int n);
39
        public int getInt(int n);
40
        public long getLong(int n);
41
        public double getDouble(int n);
42
        public Timestamp getTimestamp(int n);
43
        public String getString(int n);        
44
        public byte[] getBytes(int n);        
45

  
46
        public void setByte(int n, byte v);
47
        public void setShort(int n, short v);
48
        public void setInt(int n, int v);
49
        public void setLong(int n, long v);
50
        public void setDouble(int n, double v);
51
        public void setTimestamp(int n, Timestamp v);
52
        public void setString(int n, String v);        
53
        public void setBytes(int n, byte[] v);        
54
    }
55
    
56
    public interface RecordType {
57
        public int getSize();
58
        public int getFieldCount();
59
        public int getFieldType(int n);
60
        public int getFieldSize(int n);
61
        public int getFieldOffset(int n);
62
        public Record createRecord();
63
        public byte[] toBytes();
64
        
65
        public static RecordType from(byte[] bytes) {
66
            return RecordsFileImpl.RecordTypeImpl.from(bytes);
67
        }
68
        
69
        public static RecordType from(ByteBuffer bytes) {
70
            return RecordsFileImpl.RecordTypeImpl.from(bytes);
71
        }
72
    }
73
    
74
    public interface RecordTypeBuilder {
75
        public RecordTypeBuilder addbyte();
76
        public RecordTypeBuilder addShort();
77
        public RecordTypeBuilder addInteger();
78
        public RecordTypeBuilder addLong();
79
        public RecordTypeBuilder addDouble();
80
        public RecordTypeBuilder addTimestamp();
81
        public RecordTypeBuilder addString(int size);
82
        public RecordTypeBuilder addBytes(int size);
83
        public RecordType build();
84
        
85
        public static RecordTypeBuilder recordTypeBuilder() {
86
            return RecordsFileImpl.RecordTypeBuilderImpl.recordTypeBuilder();
87
        }
88
    }
89
    
90
    
91
    public RecordType getRecordType();
92

  
93
    public void open(File f) throws IOException;
94
    
95
    public void open(RandomAccessFile raf) throws IOException;
96

  
97
    public void create(File f, RecordType recordType, long sz) throws IOException;
98
    
99
    public void create(RandomAccessFile raf, RecordType recordType, long sz) throws IOException ;
100
    
101
    public boolean isOpen();
102
      
103
    
104
}
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/XMLFileAsList.java
1
/*
2
 * To change this license header, choose License Headers in Project Properties.
3
 * To change this template file, choose Tools | Templates
4
 * and open the template in the editor.
5
 */
6
package org.gvsig.fmap.dal.store.csv;
7

  
8
import java.io.BufferedReader;
9
import java.io.Closeable;
10
import java.io.File;
11
import java.io.IOException;
12
import java.nio.ByteBuffer;
13
import java.nio.CharBuffer;
14
import java.nio.charset.Charset;
15
import java.util.AbstractList;
16
import java.util.ArrayList;
17
import java.util.HashMap;
18
import java.util.List;
19
import java.util.Map;
20
import java.util.Objects;
21
import javax.xml.parsers.SAXParser;
22
import javax.xml.parsers.SAXParserFactory;
23
import org.apache.commons.io.IOUtils;
24
import org.apache.commons.lang3.StringUtils;
25
import org.gvsig.fmap.dal.store.csv.RecordsFile.Record;
26
import org.gvsig.fmap.dal.store.csv.RecordsFile.RecordType;
27
import static org.gvsig.fmap.dal.store.csv.RecordsFile.RecordTypeBuilder.recordTypeBuilder;
28
import org.gvsig.fmap.dal.store.csv.virtualrows.RandomAccessFileReader;
29
import org.gvsig.tools.util.GetItemWithSize64;
30
import org.xml.sax.Attributes;
31
import org.xml.sax.InputSource;
32
import org.xml.sax.Locator;
33
import org.xml.sax.SAXException;
34
import org.xml.sax.helpers.DefaultHandler;
35

  
36
/**
37
 *
38
 * @author jjdelcerro
39
 */
40
@SuppressWarnings("UseSpecificCatch")
41
public class XMLFileAsList
42
        extends AbstractList<List<String>>
43
        implements Closeable, GetItemWithSize64<List<String>> {
44

  
45
    private static final int IDXFIELD_LINE = 0;
46
    private static final int IDXFIELD_COLUMN = 1;
47
    private static final int IDXFIELD_LINEPOS = 2;
48
    private static final int IDXFIELD_RECORDPOS = 3;
49

  
50
    private final RandomAccessFileReader reader;
51
    private final String recordPath;
52
    private RecordsFile index;
53
    private List<String> fieldPaths;
54

  
55
    public XMLFileAsList(File text, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
56
        this.reader = new RandomAccessFileReader(text, charset);
57
        this.index = null;
58
        this.recordPath = recordPath;
59
    }
60

  
61
    public XMLFileAsList(File text, File index, Charset charset, String recordPath, List<String> fieldPaths) throws IOException {
62
        this.reader = new RandomAccessFileReader(text, charset);
63
        if (index.exists()) {
64
            // TODO: Force to create index if text newer than index
65
            this.index = new RecordsFileImpl(index);
66
        } else {
67
            this.createIndex(index);
68
        }
69
        this.recordPath = recordPath;
70
    }
71

  
72
    public XMLFileAsList(RandomAccessFileReader reader, RecordsFile index, String recordPath, List<String> fieldPaths) throws IOException {
73
        this.reader = reader;
74
        this.index = index;
75
        this.recordPath = recordPath;
76
    }
77

  
78
    @Override
79
    public void close() {
80
        IOUtils.closeQuietly(this.reader);
81
        IOUtils.closeQuietly(this.index);
82
    }
83

  
84
    private List<String> getRecord(long position, List<String> fieldPaths) {
85
        class StopParserSAXException extends SAXException {
86
        }
87

  
88
        class ParseRecordsHandler extends DefaultHandler {
89

  
90
            Locator locator;
91
            List<String> path = new ArrayList<>();
92
            Map<String, String> record = new HashMap<>();
93
            StringBuilder value = new StringBuilder();
94
            List<String> values = new ArrayList<>();
95

  
96
            @Override
97
            public void setDocumentLocator(Locator locator) {
98
                this.locator = locator;
99
            }
100

  
101
            @Override
102
            public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
103
                path.add(localName);
104
                String path_s = StringUtils.join(path, "/");
105
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
106
                    return;
107
                }
108
                this.value.setLength(0);
109
//                    this.value.trimToSize();
110
            }
111

  
112
            @Override
113
            public void characters(char[] ch, int start, int length) throws SAXException {
114
                value.append(new String(ch, start, length));
115
            }
116

  
117
            @Override
118
            public void endElement(String uri, String localName, String qName) throws SAXException {
119
                String path_s = StringUtils.join(path, "/");
120

  
121
                if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
122
                    for (String fieldPath : fieldPaths) {
123
                        values.add(record.get(fieldPath));
124
                    }
125
                    throw new StopParserSAXException();
126
                } else {
127
                    for (String fieldPath : fieldPaths) {
128
                        if (StringUtils.equalsIgnoreCase(path_s, fieldPath)) {
129
                            record.put(fieldPath, this.value.toString());
130
                            // No break to handle repeated columns
131
                        }
132
                    }
133
                }
134

  
135
                path.remove(path.size() - 1);
136
            }
137
        }
138

  
139
        ParseRecordsHandler handler = null;
140
        try {
141
            Record record = this.index.get64(position);
142
            long recordPosition = record.getLong(IDXFIELD_RECORDPOS);
143
            this.reader.seek(recordPosition);
144
            BufferedReader breader = new BufferedReader(this.reader, 1024 * 8);
145

  
146
            InputSource is = new InputSource(breader);
147
            SAXParserFactory spf = SAXParserFactory.newInstance();
148
            spf.setNamespaceAware(true);
149
            SAXParser saxParser = spf.newSAXParser();
150
            handler = new ParseRecordsHandler();
151

  
152
            saxParser.parse(is, handler);
153
        } catch (StopParserSAXException ex) {
154
            if (handler != null) {
155
                return handler.values;
156
            }
157
        } catch (Exception ex) {
158
            throw new RuntimeException("Can't parse record " + position, ex);
159
        }
160
        return null; // ?????
161
    }
162

  
163
    @Override
164
    public List<String> get(int index) {
165
        return this.getRecord(index, fieldPaths);
166
    }
167

  
168
    @Override
169
    public List<String> get64(long position) {
170
        return this.getRecord(position, fieldPaths);
171
    }
172

  
173
    @Override
174
    public int size() {
175
        return this.index.size();
176
    }
177

  
178
    @Override
179
    public long size64() {
180
        return this.index.size64();
181
    }
182

  
183
    final public void createIndex(File indexFile) throws IOException {
184
        try {
185
            // 1. Creamos el indice vacio
186
            RecordType recordType = recordTypeBuilder()
187
                    .addLong() // IDXFIELD_LINE
188
                    .addLong() // IDXFIELD_COLUMN
189
                    .addLong() // IDXFIELD_LINEPOS
190
                    .addLong() // IDXFIELD_RECORDPOS
191
                    .build();
192
            Record record = recordType.createRecord();
193
            final RecordsFile theIndex = new RecordsFileImpl();
194
            theIndex.create(indexFile, recordType, 0);
195

  
196
            // 2. Rellenamos los campos numero de linea y columna de cada registro 
197
            //    en el indice.
198
            // Para ello nos recorremos el XML y vamos escribiendo en el indice.
199
            this.reader.rewind();
200
            InputSource is = new InputSource(reader);
201
            SAXParserFactory spf = SAXParserFactory.newInstance();
202
            spf.setNamespaceAware(true);
203
            SAXParser saxParser = spf.newSAXParser();
204

  
205
            List<String> path = new ArrayList<>();
206
            // TODO: Aqui habria que quedarse con la primera aparicion de
207
            // los campos solicitados que no pertenezcan al registro principal.
208
            // Probablemente habria que guardar esos valores en disco junto al
209
            // fichero de indice.
210
            saxParser.parse(is, new DefaultHandler() {
211
                Locator locator;
212

  
213
                @Override
214
                public void setDocumentLocator(Locator locator) {
215
                    this.locator = locator;
216
                }
217

  
218
                @Override
219
                public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
220
                    path.add(localName);
221
                    String path_s = StringUtils.join(path, "/");
222

  
223
                    if (StringUtils.equalsIgnoreCase(path_s, recordPath)) {
224
                        record.setLong(IDXFIELD_LINE, locator.getLineNumber() - 1);
225
                        record.setLong(IDXFIELD_COLUMN, locator.getColumnNumber() - 1);
226
                        record.setLong(IDXFIELD_LINEPOS, 0);
227
                        record.setLong(IDXFIELD_RECORDPOS, 0);
228
                        theIndex.add(record);
229
                    }
230
                }
231

  
232
                @Override
233
                public void endElement(String uri, String localName, String qName) throws SAXException {
234
                    path.remove(path.size() - 1);
235
                }
236
            });
237

  
238
            // 3. Nos recorremos el indice y calculamos la posicion de la linea
239
            //    para cada registro.
240
            this.reader.rewind();
241
            PositionCalculator positionCalculator = new PositionCalculator(reader);
242

  
243
            for (int i = 0; i < theIndex.size(); i++) {
244
                Record r = theIndex.get(i);
245
                positionCalculator.next(r.getLong(IDXFIELD_LINE), r.getLong(IDXFIELD_COLUMN));
246
                r.setLong(IDXFIELD_LINEPOS, positionCalculator.getLinePosition());
247
                r.setLong(IDXFIELD_RECORDPOS, positionCalculator.getColumnPosition());
248
                theIndex.set(i, record);
249
            }
250
            this.index = new RecordsFileImpl(indexFile);
251
        } catch (Exception ex) {
252
            throw new IOException("Can't create index " + Objects.toString(indexFile), ex);
253
        }
254
    }
255

  
256
    private static class CharacterSize {
257

  
258
        private final Charset charset;
259
        private final CharBuffer charBuffer;
260

  
261
        public CharacterSize(Charset charset) {
262
            this.charset = charset;
263
            this.charBuffer = CharBuffer.allocate(1);
264
        }
265

  
266
        public int size(char ch) {
267
            this.charBuffer.put(0, ch);
268
            this.charBuffer.position(0);
269
            ByteBuffer buffer = this.charset.encode(this.charBuffer);
270
            return buffer.limit();
271
        }
272

  
273
        public int size(char[] cbuf, int off, int len) {
274
            CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
275
            ByteBuffer byteBuffer = this.charset.encode(cb);
276
            return byteBuffer.limit();
277
        }
278

  
279
        public int size(char[] cbuf) {
280
            CharBuffer cb = CharBuffer.wrap(cbuf, 0, cbuf.length);
281
            ByteBuffer byteBuffer = this.charset.encode(cb);
282
            return byteBuffer.limit();
283
        }
284

  
285
        public int size(String s) {
286
            CharBuffer cb = CharBuffer.wrap(s);
287
            ByteBuffer byteBuffer = this.charset.encode(cb);
288
            return byteBuffer.limit();
289
        }
290
    }
291

  
292
    private static class PositionCalculator {
293

  
294
        private final RandomAccessFileReader reader;
295
        private final BufferedReader breader;
296
        private long currentPosition;
297
        private long currentColumn;
298
        private long currentLine;
299
        private long currentColumnPosition;
300
        private long currentLinePosition;
301

  
302
        private final char[] ch;
303
        private final CharacterSize characterSize;
304

  
305
        public PositionCalculator(RandomAccessFileReader reader) {
306
            this.reader = reader;
307
            this.breader = new BufferedReader(this.reader, 1024 * 8);
308
            this.currentPosition = this.reader.getCurrentPosition();
309
            this.currentLine = 0;
310
            this.currentColumn = 0;
311
            this.ch = new char[1];
312
            this.characterSize = new CharacterSize(this.reader.getCharset());
313
        }
314

  
315
        public boolean next(long line, long column) throws IOException {
316
            while (this.currentLine < line) {
317
                if (breader.read(this.ch, 0, 1) < 1) {
318
                    return false;
319
                }
320
                char c = ch[0];
321
                this.currentPosition += characterSize.size(c);
322
                if (c == '\n') {
323
                    this.currentLine++;
324
                    this.currentColumn = 0;
325
                }
326
            }
327
            this.currentLinePosition = this.currentPosition;
328
            while (this.currentColumn < column) {
329
                if (breader.read(this.ch, 0, 1) < 1) {
330
                    return false;
331
                }
332
                char c = ch[0];
333
                this.currentPosition += characterSize.size(c);
334
                if (c == '\n') {
335
                    // Uff, esto seria un error, ya que la linea corriente no
336
                    // tiene tantas columnas.
337
                    throw new IOException("Illegal column number " + column + " for line " + line);
338
                }
339
                this.currentColumn++;
340
            }
341
            this.currentColumnPosition = this.currentPosition;
342
            return true;
343
        }
344

  
345
        public long getLinePosition() {
346
            return this.currentLinePosition;
347
        }
348

  
349
        public long getColumnPosition() {
350
            return this.currentColumnPosition;
351
        }
352

  
353
    }
354

  
355
}
trunk/org.gvsig.desktop/org.gvsig.desktop.compat.cdc/org.gvsig.fmap.dal/org.gvsig.fmap.dal.file/org.gvsig.fmap.dal.file.csv/src/main/java/org/gvsig/fmap/dal/store/csv/RecordsFileImpl.java
1
package org.gvsig.fmap.dal.store.csv;
2

  
3
import java.io.File;
4
import java.io.IOException;
5
import java.io.RandomAccessFile;
6
import java.nio.ByteBuffer;
7
import java.nio.MappedByteBuffer;
8
import java.nio.channels.FileChannel;
9
import java.sql.Timestamp;
10
import java.util.AbstractList;
11
import java.util.ArrayList;
12
import java.util.List;
13
import org.apache.commons.io.IOUtils;
14
import org.gvsig.fmap.dal.store.csv.RecordsFile.Record;
15
import static org.gvsig.fmap.dal.store.csv.RecordsFile.TYPE_BYTE;
16
import static org.gvsig.fmap.dal.store.csv.RecordsFile.TYPE_STRING;
17
import static org.gvsig.fmap.dal.store.csv.RecordsFile.TYPE_TIMESTAMP;
18

  
19
/**
20
 *
21
 * @author jjdelcerro
22
 */
23
public class RecordsFileImpl 
24
        extends AbstractList<Record>
25
        implements RecordsFile 
26
    {
27

  
28
    public static class RecordImpl implements Record {
29
        private final RecordType type;
30
        private final ByteBuffer buffer;
31
        
32
        public RecordImpl(RecordType type) {
33
            this.type = type;
34
            this.buffer = ByteBuffer.allocateDirect(this.type.getSize());
35
        }
36
        
37
        @Override
38
        public RecordType getType() {
39
            return this.type;
40
        }
41

  
42
        @Override
43
        public byte[] getBytes() {
44
            return this.buffer.array();
45
        }
46

  
47
        @Override
48
        public ByteBuffer getBuffer() {
49
            return this.buffer;
50
        }
51
        
52
        @Override
53
        public void setBytes(byte[] bytes) {
54
            this.buffer.position(0);
55
            this.buffer.put(bytes);
56
        }
57

  
58
        @Override
59
        public int getInt(int n) {
60
            this.buffer.position(this.type.getFieldOffset(n));
61
            return this.buffer.getInt();
62
        }
63

  
64
        @Override
65
        public long getLong(int n) {
66
            this.buffer.position(this.type.getFieldOffset(n));
67
            return this.buffer.getLong();
68
        }
69

  
70
        @Override
71
        public double getDouble(int n) {
72
            this.buffer.position(this.type.getFieldOffset(n));
73
            return this.buffer.getDouble();
74
        }
75

  
76
        @Override
77
        public Timestamp getTimestamp(int n) {
78
            this.buffer.position(this.type.getFieldOffset(n));
79
            long l = this.buffer.getLong();
80
            return new Timestamp(l);
81
        }
82

  
83
        @Override
84
        public String getString(int n) {
85
            this.buffer.position(this.type.getFieldOffset(n));
86
            short sz = this.buffer.getShort();
87
            char[] chars = new char[sz];
88
            for (int i = 0; i < sz; i++) {
89
                chars[i] = this.buffer.getChar(i);
90
            }
91
            return new String(chars);
92
        }
93

  
94
        @Override
95
        public byte[] getBytes(int n) {
96
            this.buffer.position(this.type.getFieldOffset(n));
97
            int sz = this.buffer.getInt();
98
            byte[] bytes = new byte[sz];
99
            for (int i = 0; i < sz; i++) {
100
                bytes[i] = this.buffer.get(i);
101
            }
102
            return bytes;
103
        }
104

  
105
        @Override
106
        public int getByte(int n) {
107
            this.buffer.position(this.type.getFieldOffset(n));
108
            return this.buffer.get();
109
        }
110

  
111
        @Override
112
        public int getShort(int n) {
113
            this.buffer.position(this.type.getFieldOffset(n));
114
            return this.buffer.getShort();
115
        }
116

  
117
        @Override
118
        public void setByte(int n, byte v) {
119
            this.buffer.position(this.type.getFieldOffset(n));
120
            this.buffer.put(v);
121
        }
122

  
123
        @Override
124
        public void setShort(int n, short v) {
125
            this.buffer.position(this.type.getFieldOffset(n));
126
            this.buffer.putShort(v);
127
        }
128

  
129
        @Override
130
        public void setInt(int n, int v) {
131
            this.buffer.position(this.type.getFieldOffset(n));
132
            this.buffer.putInt(v);
133
        }
134

  
135
        @Override
136
        public void setLong(int n, long v) {
137
            this.buffer.position(this.type.getFieldOffset(n));
138
            this.buffer.putLong(v);
139
        }
140

  
141
        @Override
142
        public void setDouble(int n, double v) {
143
            this.buffer.position(this.type.getFieldOffset(n));
144
            this.buffer.putDouble(v);
145
        }
146

  
147
        @Override
148
        public void setTimestamp(int n, Timestamp v) {
149
            this.buffer.position(this.type.getFieldOffset(n));
150
            this.buffer.putLong(v.getTime());
151
        }
152

  
153
        @Override
154
        public void setString(int n, String v) {
155
            this.buffer.position(this.type.getFieldOffset(n));
156
            this.buffer.putShort((short) v.length());
157
            for (int i = 0; i < v.length(); i++) {
158
                this.buffer.putChar(v.charAt(i));
159
            }
160
        }
161

  
162
        @Override
163
        public void setBytes(int n, byte[] v) {
164
            this.buffer.position(this.type.getFieldOffset(n));
165
            this.buffer.putInt(v.length);
166
            for (int i = 0; i < v.length; i++) {
167
                this.buffer.put(v[i]);
168
            }
169
        }
170
    }
171

  
172
    
173
    public static class RecordTypeImpl implements RecordType {
174

  
175
        private static class FieldType {
176
            public byte type;
177
            public int size;
178
            public int offset;
179
        }
180
        
181
        private final List<FieldType> fields;
182
        private int size;
183
        
184
        public RecordTypeImpl() {
185
            this.fields = new ArrayList<>();
186
            this.size = -1;
187
        }
188

  
189
        public void add(int type, int size) {
190
            FieldType field = new FieldType();
191
            field.size = size;
192
            field.type = (byte) type;
193
            this.size = -1;
194
            field.offset = this.getSize();
195
            this.fields.add(field);
196
        }
197
        
198
        @Override
199
        public int getSize() {
200
            if( this.size<0 ) {
201
                int sz = 0;
202
                for (FieldType field : fields) {
203
                    switch(field.type) {
204
                        case TYPE_BYTE:
205
                            sz += 1;
206
                            break;
207
                        case TYPE_SHORT:
208
                            sz += 2;
209
                            break;
210
                        case TYPE_INTEGER:
211
                            sz += 4;
212
                            break;
213
                        case TYPE_TIMESTAMP:
214
                        case TYPE_LONG:
215
                            sz += 8;
216
                            break;
217
                        case TYPE_DOUBLE:
218
                            sz += 8;
219
                            break;
220
                        case TYPE_STRING:
221
                            sz += 2 + (field.size*2);
222
                            break;
223
                        case TYPE_BYTES:
224
                            sz += 4 + field.size;
225
                            break;
226
                    }
227
                }
228
                this.size = sz;
229
            }
230
            return this.size;
231
        }
232

  
233
        @Override
234
        public int getFieldCount() {
235
            return this.fields.size();
236
        }
237

  
238
        @Override
239
        public int getFieldType(int n) {
240
            return this.fields.get(n).type;
241
        }
242

  
243
        @Override
244
        public int getFieldSize(int n) {
245
            return this.fields.get(n).size;
246
        }
247

  
248
        @Override
249
        public int getFieldOffset(int n) {
250
            return this.fields.get(n).offset;
251
        }
252

  
253
        @Override
254
        public Record createRecord() {
255
            return new RecordImpl(this);
256
        }
257

  
258
        @Override
259
        public byte[] toBytes() {
260
            ByteBuffer buffer = ByteBuffer.allocateDirect(this.fields.size()*(1+4));
261
            for (FieldType field : fields) {
262
                buffer.put(field.type);
263
                buffer.putInt(field.size);
264
            }
265
            return buffer.array();
266
        }
267
        
268
        public static RecordType from(ByteBuffer bytes) {
269
            RecordTypeImpl recordType = new RecordTypeImpl();
270
            
271
            bytes.position(0);
272
            int len = bytes.limit()/(1+4);
273
            for (int i = 0; i < len; i++) {
274
                int type = bytes.get();
275
                int size = bytes.getInt();
276
                recordType.add(type, size);
277
            }
278
            return recordType;
279
        }
280
        
281
        public static RecordType from(byte[] bytes) {
282
            ByteBuffer buffer = ByteBuffer.allocateDirect(bytes.length);
283
            buffer.put(bytes);
284
            return from(buffer);
285
        }
286
        
287
    }
288
    
289
    public static class RecordTypeBuilderImpl implements RecordTypeBuilder {
290

  
291
        private final RecordTypeImpl recordType;
292
        
293
        public RecordTypeBuilderImpl() {
294
            this.recordType = new RecordTypeImpl();
295
        }
296
        
297
        @Override
298
        public RecordTypeBuilder addbyte() {
299
            this.recordType.add(TYPE_BYTE, 0);
300
            return this;
301
        }
302

  
303
        @Override
304
        public RecordTypeBuilder addShort() {
305
            this.recordType.add(TYPE_SHORT, 0);
306
            return this;
307
        }
308

  
309
        @Override
310
        public RecordTypeBuilder addInteger() {
311
            this.recordType.add(TYPE_INTEGER, 0);
312
            return this;
313
        }
314

  
315
        @Override
316
        public RecordTypeBuilder addLong() {
317
            this.recordType.add(TYPE_LONG, 0);
318
            return this;
319
        }
320

  
321
        @Override
322
        public RecordTypeBuilder addDouble() {
323
            this.recordType.add(TYPE_DOUBLE, 0);
324
            return this;
325
        }
326

  
327
        @Override
328
        public RecordTypeBuilder addTimestamp() {
329
            this.recordType.add(TYPE_TIMESTAMP, 0);
330
            return this;
331
        }
332

  
333
        @Override
334
        public RecordTypeBuilder addString(int size) {
335
            this.recordType.add(TYPE_STRING, size);
336
            return this;
337
        }
338

  
339
        @Override
340
        public RecordTypeBuilder addBytes(int size) {
341
            this.recordType.add(TYPE_BYTES, size);
342
            return this;
343
        }
344

  
345
        @Override
346
        public RecordType build() {
347
            return this.recordType;
348
        }
349
        
350
        public static RecordTypeBuilder recordTypeBuilder() {
351
            return new RecordTypeBuilderImpl();
352
        }
353

  
354
    }
355
    
356
    private static final int HEADER_SIZE = 1024;
357
    
358
    private RandomAccessFile raf;
359
    private MappedByteBuffer fileByteBuffer;
360
    private long sz;
361
    private int header_size;
362
    private int type_size;
363
    private RecordType recordType;
364

  
365
    public RecordsFileImpl() {
366
        
367
    }
368

  
369
    @SuppressWarnings("OverridableMethodCallInConstructor")
370
    public RecordsFileImpl(File f) throws IOException {
371
        this.open(f);
372
    }
373
    
374
    @SuppressWarnings("OverridableMethodCallInConstructor")
375
    public RecordsFileImpl(RandomAccessFile raf) throws IOException {
376
        this.open(raf);
377
    }
378
    
379
    @Override
380
    public RecordType getRecordType() {
381
        return this.recordType;
382
    }
383
    
384
    @Override
385
    public void open(File f) throws IOException {
386
        RandomAccessFile theRaf = new RandomAccessFile(f,"r");
387
        this.open(theRaf);
388
    }
389
    
390
    @Override
391
    public void open(RandomAccessFile raf) throws IOException {
392
        this.raf = raf;
393
        this.fileByteBuffer = this.raf.getChannel().map(
394
                FileChannel.MapMode.READ_ONLY, 
395
                0, 
396
                this.raf.length()
397
        );
398
        this.header_size = this.fileByteBuffer.getInt();
399
        this.type_size = this.fileByteBuffer.getInt();
400
        this.recordType = RecordType.from(this.fileByteBuffer.get(new byte[type_size]));
401
        this.sz = (fileByteBuffer.limit()-this.header_size) / this.recordType.getSize();
402
        // TODO: meter en la cabecera el numero de registros para poder escribir datos al final
403
    }
404

  
405
    @Override
406
    public void create(File f, RecordType recordType, long sz) throws IOException {
407
        RandomAccessFile theRaf = new RandomAccessFile(f,"rw");
408
        this.create(theRaf,recordType, sz);
409
    }
410
    
411
    @Override
412
    public void create(RandomAccessFile raf, RecordType recordType, long sz) throws IOException {
413
        this.raf = raf;
414
        this.recordType = recordType;
415
        this.header_size = HEADER_SIZE;
416
        this.raf.setLength(this.header_size +(sz*this.recordType.getSize()));
417
        this.fileByteBuffer = this.raf.getChannel().map(
418
                FileChannel.MapMode.READ_WRITE, 
419
                0, 
420
                this.raf.length()
421
        );
422
        this.sz = (fileByteBuffer.limit()-this.header_size) / this.recordType.getSize();
423
        byte[] recordTypeBytes = this.recordType.toBytes();
424
        this.fileByteBuffer.position(0);
425
        this.fileByteBuffer.putLong(this.header_size);
426
        this.fileByteBuffer.putLong(recordTypeBytes.length);
427
        this.fileByteBuffer.put(recordTypeBytes);
428
    }
429

  
430
    @Override
431
    public void close() throws IOException {
432
        this.fileByteBuffer.force();
433
        IOUtils.closeQuietly(this.raf);
434
        this.raf = null;
435
        this.fileByteBuffer = null;
436
        this.sz = -1;
437
    }
438

  
439
    @Override
440
    public boolean isOpen() {
441
        return this.fileByteBuffer!=null;
442
    }
443
      
444
    @Override
445
    public Record get(int position) {
446
        position = checkIndex(position);
447
        Record record = this.recordType.createRecord();
448
        this.fileByteBuffer.position(this.header_size+(position*this.recordType.getSize()));
449
        this.fileByteBuffer.get(record.getBytes());
450
        return record;
451
    }
452

  
453
    @Override
454
    public Record get64(long position) {
455
        position = checkIndex(position);
456
        Record record = this.recordType.createRecord();
457
        this.fileByteBuffer.position((int) (this.header_size+(position*this.recordType.getSize())));
458
        this.fileByteBuffer.get(record.getBytes());
459
        return record;
460
    }
461

  
462
    @Override
463
    public int size() {
464
        return (int) this.sz;
465
    }
466

  
467
    @Override
468
    public long size64() {
469
        return this.sz;
470
    }
471

  
472
    @Override
473
    public Record set(int position, Record record) {
474
        position = checkIndex(position);
475
        this.fileByteBuffer.position((int) (this.header_size+(position*this.recordType.getSize())));
476
        this.fileByteBuffer.put(record.getBytes());
477
        return record;
478
    }
479
    
480
    private int checkIndex(long index) {
481
        if (this.fileByteBuffer == null) {
482
            throw new IllegalStateException("Index not open");
483
        }
484
        if( index < 0 ) {
485
            index = ((int)this.sz) + index;
486
        }
487
        if( index<0 || index>=this.sz) {
488
            throw new IllegalArgumentException("Index out of range ("+index+")");
489
        }
490
        return (int) index;
491
    }
492

  
493
    @Override
494
    public void add(int index, Record record) {
495
        // TODO: implement add method
496
        throw new UnsupportedOperationException();
497
    }
498
}

Also available in: Unified diff