svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / gml / TestXML.java @ 47638
History | View | Annotate | Download (14.6 KB)
1 |
package org.gvsig.fmap.dal.store.gml; |
---|---|
2 |
|
3 |
import java.io.File; |
4 |
import java.io.FileInputStream; |
5 |
import java.io.InputStreamReader; |
6 |
import java.io.Reader; |
7 |
import java.nio.charset.Charset; |
8 |
import java.nio.charset.StandardCharsets; |
9 |
import java.util.ArrayList; |
10 |
import java.util.Collections; |
11 |
import java.util.HashMap; |
12 |
import java.util.HashSet; |
13 |
import java.util.Iterator; |
14 |
import java.util.LinkedHashMap; |
15 |
import java.util.LinkedHashSet; |
16 |
import java.util.List; |
17 |
import java.util.Map; |
18 |
import java.util.Set; |
19 |
import javax.xml.parsers.SAXParser; |
20 |
import javax.xml.parsers.SAXParserFactory; |
21 |
import org.apache.commons.io.FilenameUtils; |
22 |
import org.apache.commons.lang3.StringUtils; |
23 |
import org.apache.commons.lang3.mutable.MutableLong; |
24 |
import org.gvsig.tools.namestranslator.NamesTranslator; |
25 |
import org.xml.sax.Attributes; |
26 |
import org.xml.sax.InputSource; |
27 |
import org.xml.sax.Locator; |
28 |
import org.xml.sax.SAXException; |
29 |
import org.xml.sax.helpers.DefaultHandler; |
30 |
|
31 |
public class TestXML { |
32 |
|
33 |
private static final String XMLFILE1 = "/home/jjdelcerro/datos/geodata/vector/sigpac/2018/Declaracion.xml"; |
34 |
private static final String XMLFILE2 = "/home/jjdelcerro/datos/geodata/vector/ARENA2/quincenas-0/TV_03_2019_01_Q1/victimas.xml"; |
35 |
private static final String XMLFILE_BIG = "/home/jjdelcerro/datos/geodata/vector/RSUPAC/2020/BDA_RSU_PAC20_1713052020_001.XML"; |
36 |
|
37 |
public static void main(String[] args) throws Exception { |
38 |
TestXML t = new TestXML();
|
39 |
|
40 |
List<String> tags = new ArrayList(t.extractTags(XMLFILE_BIG)); |
41 |
Collections.sort(tags);
|
42 |
|
43 |
// System.out.println("Tags: -------------------------");
|
44 |
// for (String tag : tags) {
|
45 |
// System.out.println(tag);
|
46 |
// }
|
47 |
Map<String,List<String>> tables = new LinkedHashMap<>(); |
48 |
Map<String,Set<String>> tables_tmp = new LinkedHashMap<>(); |
49 |
for (String tag1 : tags) { |
50 |
String tableName = tag1;
|
51 |
Set fields = new HashSet<>(); |
52 |
for (String tag2 : tags) { |
53 |
if( tableName.equals(tag2) ) {
|
54 |
continue;
|
55 |
} |
56 |
if( tag2.startsWith(tableName) ) {
|
57 |
String fieldName = tag2.substring(tableName.length()+1); |
58 |
if( !fieldName.contains("/") ) { |
59 |
fields.add(fieldName); |
60 |
} |
61 |
} |
62 |
} |
63 |
if( !fields.isEmpty() ) {
|
64 |
tables_tmp.put(tableName,fields); |
65 |
} |
66 |
} |
67 |
Map<String,String> tableNames = new HashMap<>(); |
68 |
// NamesTranslator nt = NamesTranslator.createTrimTranslator(45);
|
69 |
for (Map.Entry<String, Set<String>> entry : tables_tmp.entrySet()) { |
70 |
String fullTableName = entry.getKey();
|
71 |
String tableName = FilenameUtils.getBaseName(fullTableName);
|
72 |
// int i = nt.addSource(fullTableName);
|
73 |
// tableNames.put(tableName,nt.getTranslation(i));
|
74 |
tableNames.put(fullTableName,tableName); |
75 |
} |
76 |
for (Map.Entry<String, Set<String>> entry : tables_tmp.entrySet()) { |
77 |
String tableName = entry.getKey();
|
78 |
List<String> fields = new ArrayList<>(); |
79 |
String[] fieldkeys = tableName.split("/"); |
80 |
for (int i = 0; i < fieldkeys.length; i++) { |
81 |
String fieldkey = fieldkeys[i];
|
82 |
if( i==fieldkeys.length-1 ) { |
83 |
fields.add("$ID_"+fieldkey+", PK"); |
84 |
} else {
|
85 |
fields.add("$ID_"+fieldkey+", FK"); |
86 |
} |
87 |
} |
88 |
for (String fieldName : entry.getValue()) { |
89 |
if( tables_tmp.containsKey(tableName+"/"+fieldName) ) { |
90 |
fields.add("$ID_"+fieldName+" FK"); |
91 |
} else {
|
92 |
fields.add(fieldName); |
93 |
} |
94 |
} |
95 |
Collections.sort(fields);
|
96 |
tables.put(tableName, fields); |
97 |
} |
98 |
|
99 |
int n = 1; |
100 |
for (Map.Entry<String, List<String>> entry : tables.entrySet()) { |
101 |
String tableName = entry.getKey();
|
102 |
List<String> fields = entry.getValue(); |
103 |
System.out.println("TABLE "+tableNames.get(tableName)+" ("+n+", "+tableName+")"); |
104 |
for (String field : fields) { |
105 |
System.out.println(" "+field); |
106 |
} |
107 |
n++; |
108 |
} |
109 |
|
110 |
|
111 |
// List<List<String>> records = t.getRecords1(
|
112 |
// XMLFILE1,
|
113 |
// "DECLARACION/LINEA_DECLARACION",
|
114 |
// ListBuilder.create(
|
115 |
// "DECLARACION/LINEA_DECLARACION/PROV",
|
116 |
//// "DECLARACION/LINEA_DECLARACION/MUN_INE",
|
117 |
//// "DECLARACION/LINEA_DECLARACION/MUN_CAT",
|
118 |
//// "DECLARACION/LINEA_DECLARACION/AGREGADO",
|
119 |
// "DECLARACION/LINEA_DECLARACION/ZONA",
|
120 |
// "DECLARACION/LINEA_DECLARACION/POLIGONO",
|
121 |
// "DECLARACION/LINEA_DECLARACION/PARCELA",
|
122 |
// "DECLARACION/LINEA_DECLARACION/RECINTO",
|
123 |
// "DECLARACION/LINEA_DECLARACION/PARCELA_AGRICOLA",
|
124 |
// "DECLARACION/LINEA_DECLARACION/CULTIVO",
|
125 |
//// "DECLARACION/LINEA_DECLARACION/WKT",
|
126 |
// "DECLARACION/LINEA_DECLARACION/DN_SURFACE",
|
127 |
// "DECLARACION/LINEA_DECLARACION/SUPERFICIE_DECLARADA"
|
128 |
//// "DECLARACION/LINEA_DECLARACION/FC_ALMENDROS",
|
129 |
//// "DECLARACION/LINEA_DECLARACION/FC_ALGARROBOS",
|
130 |
//// "DECLARACION/LINEA_DECLARACION/FC_AVELLANOS",
|
131 |
//// "DECLARACION/LINEA_DECLARACION/FC_NOGALES",
|
132 |
//// "DECLARACION/LINEA_DECLARACION/FC_PISTACHOS",
|
133 |
//// "DECLARACION/LINEA_DECLARACION/FC_TOTAL"
|
134 |
// )
|
135 |
// );
|
136 |
// for (List<String> record : records) {
|
137 |
// System.out.println(StringUtils.join(record, ","));
|
138 |
// }
|
139 |
} |
140 |
|
141 |
private Reader openFileReader(File xmlfile) throws Exception { |
142 |
FileInputStream fis = new FileInputStream(xmlfile); |
143 |
// EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
|
144 |
// Charset encoding = encodingDetector.detect(fis, null);
|
145 |
Charset encoding = StandardCharsets.UTF_8;
|
146 |
InputStreamReader reader = new InputStreamReader(fis, encoding); |
147 |
return reader;
|
148 |
} |
149 |
|
150 |
private Set<String> extractTags(String xmlfile) throws Exception { |
151 |
SAXParserFactory spf = SAXParserFactory.newInstance(); |
152 |
spf.setNamespaceAware(true);
|
153 |
SAXParser saxParser = spf.newSAXParser();
|
154 |
// RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
|
155 |
Reader reader = openFileReader(new File(xmlfile)); |
156 |
InputSource is = new InputSource(reader);
|
157 |
|
158 |
List<String> path = new ArrayList<>(); |
159 |
Set<String> tags = new LinkedHashSet<>(); |
160 |
|
161 |
// System.out.println("Parse (extract-tags): -------------------------");
|
162 |
saxParser.parse(is, new DefaultHandler() {
|
163 |
private Locator locator;
|
164 |
|
165 |
@Override
|
166 |
public void setDocumentLocator(Locator locator) { |
167 |
this.locator = locator;
|
168 |
} |
169 |
|
170 |
@Override
|
171 |
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { |
172 |
path.add(localName); |
173 |
|
174 |
int line = this.locator.getLineNumber(); |
175 |
int column = this.locator.getColumnNumber()-2-localName.length(); |
176 |
String path_s = StringUtils.join(path, "/"); |
177 |
|
178 |
tags.add(path_s); |
179 |
for (int i = 0; i < attributes.getLength(); i++) { |
180 |
String name = attributes.getLocalName(i);
|
181 |
tags.add(path_s+"/#"+name);
|
182 |
} |
183 |
// if( "DECLARACION/LINEA_DECLARACION".equals(StringUtils.join(path, "/")) ) {
|
184 |
// System.out.println(path_s+": "+line+":"+column);
|
185 |
// }
|
186 |
// if( "DECLARACION".equals(StringUtils.join(path, "/")) ) {
|
187 |
// System.out.println(path_s+": "+line+":"+column);
|
188 |
// }
|
189 |
} |
190 |
|
191 |
@Override
|
192 |
public void endElement(String uri, String localName, String qName) throws SAXException { |
193 |
path.remove(path.size()-1);
|
194 |
} |
195 |
}); |
196 |
|
197 |
return tags;
|
198 |
} |
199 |
|
200 |
private List<List<String>> getRecords1(String xmlfile, String recordPath, List<String>fieldPaths) throws Exception { |
201 |
class ParseRecordsHandler extends DefaultHandler { |
202 |
Locator locator; |
203 |
List<String> path = new ArrayList<>(); |
204 |
List<List<String>> records = new ArrayList<>(); |
205 |
Map<String,String> record = new HashMap<>(); |
206 |
StringBuilder value = new StringBuilder(); |
207 |
|
208 |
@Override
|
209 |
public void setDocumentLocator(Locator locator) { |
210 |
this.locator = locator;
|
211 |
} |
212 |
|
213 |
@Override
|
214 |
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { |
215 |
path.add(localName); |
216 |
|
217 |
int line = this.locator.getLineNumber(); |
218 |
int column = this.locator.getColumnNumber()-2-localName.length(); |
219 |
String path_s = StringUtils.join(path, "/"); |
220 |
|
221 |
if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
|
222 |
return;
|
223 |
} |
224 |
this.value.setLength(0); |
225 |
// this.value.trimToSize();
|
226 |
} |
227 |
|
228 |
@Override
|
229 |
public void characters(char[] ch, int start, int length) throws SAXException { |
230 |
value.append(new String(ch, start, length)); |
231 |
} |
232 |
|
233 |
@Override
|
234 |
public void endElement(String uri, String localName, String qName) throws SAXException { |
235 |
int line = this.locator.getLineNumber(); |
236 |
int column = this.locator.getColumnNumber()-2-localName.length(); |
237 |
String path_s = StringUtils.join(path, "/"); |
238 |
|
239 |
if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
|
240 |
List<String> values = new ArrayList<>(); |
241 |
for (String fieldPath : fieldPaths) { |
242 |
values.add(record.get(fieldPath)); |
243 |
} |
244 |
records.add(values); |
245 |
record.clear(); |
246 |
} else {
|
247 |
for (String fieldPath : fieldPaths) { |
248 |
if( StringUtils.equalsIgnoreCase(path_s, fieldPath) ) {
|
249 |
record.put(fieldPath, this.value.toString());
|
250 |
// No break to handle repeated columns
|
251 |
} |
252 |
} |
253 |
} |
254 |
|
255 |
path.remove(path.size()-1);
|
256 |
} |
257 |
} |
258 |
|
259 |
SAXParserFactory spf = SAXParserFactory.newInstance(); |
260 |
spf.setNamespaceAware(true);
|
261 |
SAXParser saxParser = spf.newSAXParser();
|
262 |
// RandomAccessFileReader reader = new RandomAccessFileReader(new File(xmlfile), StandardCharsets.UTF_8);
|
263 |
Reader reader = openFileReader(new File(xmlfile)); |
264 |
InputSource is = new InputSource(reader);
|
265 |
|
266 |
|
267 |
ParseRecordsHandler handler = new ParseRecordsHandler();
|
268 |
|
269 |
System.out.println("Parse (getRecords): -------------------------"); |
270 |
saxParser.parse(is, handler); |
271 |
|
272 |
return handler.records;
|
273 |
} |
274 |
|
275 |
private long getRecordCount(String xmlfile, String recordPath) throws Exception { |
276 |
SAXParserFactory spf = SAXParserFactory.newInstance(); |
277 |
spf.setNamespaceAware(true);
|
278 |
SAXParser saxParser = spf.newSAXParser();
|
279 |
Reader reader = openFileReader(new File(xmlfile)); |
280 |
InputSource is = new InputSource(reader);
|
281 |
|
282 |
List<String> path = new ArrayList<>(); |
283 |
MutableLong numRecords = new MutableLong(0); |
284 |
|
285 |
System.out.println("Parse (countRecords)"); |
286 |
saxParser.parse(is, new DefaultHandler() {
|
287 |
@Override
|
288 |
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { |
289 |
path.add(localName); |
290 |
String path_s = StringUtils.join(path, "/"); |
291 |
|
292 |
if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
|
293 |
numRecords.increment(); |
294 |
} |
295 |
} |
296 |
|
297 |
@Override
|
298 |
public void endElement(String uri, String localName, String qName) throws SAXException { |
299 |
path.remove(path.size()-1);
|
300 |
} |
301 |
}); |
302 |
|
303 |
return numRecords.longValue();
|
304 |
} |
305 |
|
306 |
// private void createIndex(String xmlfile, String recordPath) throws Exception {
|
307 |
//
|
308 |
// long countRecords = getRecordCount(xmlfile, recordPath);
|
309 |
// if (countRecords < 1) {
|
310 |
// return;
|
311 |
// }
|
312 |
// RandomAccessFileIndex record_idx = new RandomAccessFileIndex();
|
313 |
// record_idx.create(fileIndex, countRecords);
|
314 |
//
|
315 |
// SAXParserFactory spf = SAXParserFactory.newInstance();
|
316 |
// spf.setNamespaceAware(true);
|
317 |
// SAXParser saxParser = spf.newSAXParser();
|
318 |
// Reader reader = openFileReader(new File(xmlfile));
|
319 |
// InputSource is = new InputSource(reader);
|
320 |
// List<String> path = new ArrayList<>();
|
321 |
//
|
322 |
// System.out.println("Parse (createIndex)");
|
323 |
// saxParser.parse(is, new DefaultHandler() {
|
324 |
// Locator locator;
|
325 |
//
|
326 |
// @Override
|
327 |
// public void setDocumentLocator(Locator locator) {
|
328 |
// this.locator = locator;
|
329 |
// }
|
330 |
// @Override
|
331 |
// public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
|
332 |
// path.add(localName);
|
333 |
// int line = this.locator.getLineNumber();
|
334 |
// int column = this.locator.getColumnNumber()-2-localName.length();
|
335 |
// String path_s = StringUtils.join(path, "/");
|
336 |
//
|
337 |
// if( StringUtils.equalsIgnoreCase(path_s, recordPath) ) {
|
338 |
// record_idx.set(lineno++, position);
|
339 |
// }
|
340 |
// }
|
341 |
//
|
342 |
// @Override
|
343 |
// public void endElement(String uri, String localName, String qName) throws SAXException {
|
344 |
// path.remove(path.size()-1);
|
345 |
// }
|
346 |
// });
|
347 |
//
|
348 |
// return numRecords.longValue();
|
349 |
// }
|
350 |
|
351 |
private void test() { |
352 |
} |
353 |
|
354 |
} |