Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.plugin / org.gvsig.xml2db / org.gvsig.xml2db.lib / org.gvsig.xml2db.lib.impl / src / main / java / org / gvsig / xml2db / lib / impl / StructureExtractorImpl.java @ 47336

History | View | Annotate | Download (21.7 KB)

1
/*
2
 * To change this license header, choose License Headers in Project Properties.
3
 * To change this template file, choose Tools | Templates
4
 * and open the template in the editor.
5
 */
6
package org.gvsig.xml2db.lib.impl;
7

    
8
import org.gvsig.xml2db.lib.impl.xmlinfo.XMLAttributeInfoImpl;
9
import org.gvsig.xml2db.lib.impl.xmlinfo.XMLTableInfoImpl;
10
import java.io.File;
11
import java.io.FileNotFoundException;
12
import java.io.IOException;
13
import java.io.InputStream;
14
import java.io.Reader;
15
import java.nio.charset.Charset;
16
import java.util.ArrayList;
17
import java.util.List;
18
import java.util.Locale;
19
import javax.xml.parsers.SAXParser;
20
import javax.xml.parsers.SAXParserFactory;
21
import org.apache.commons.lang3.ArrayUtils;
22
import org.apache.commons.lang3.StringUtils;
23
import org.apache.tika.utils.CharsetUtils;
24
import org.cresques.cts.IProjection;
25
import org.gvsig.fmap.crs.CRSFactory;
26
import org.gvsig.fmap.dal.DALLocator;
27
import org.gvsig.fmap.dal.DataManager;
28
import org.gvsig.fmap.dal.DataTypes;
29
import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor;
30
import org.gvsig.fmap.dal.feature.EditableFeatureType;
31
import org.gvsig.fmap.dal.feature.EditableForeingKey;
32
import org.gvsig.fmap.dal.feature.FeatureAttributeDescriptor;
33
import org.gvsig.tools.dynobject.DynField_v2;
34
import org.gvsig.tools.task.SimpleTaskStatus;
35
import org.gvsig.xml2db.lib.api.xmlinfo.XMLAttributeInfo;
36
import org.gvsig.xml2db.lib.api.xmlinfo.XMLInfo;
37
import org.gvsig.xml2db.lib.api.xmlinfo.XMLTableInfo;
38
import org.gvsig.xml2db.lib.impl.xmlinfo.XMLInfoImpl;
39
import org.xml.sax.Attributes;
40
import org.xml.sax.InputSource;
41
import org.xml.sax.Locator;
42
import org.xml.sax.SAXException;
43
import org.xml.sax.helpers.DefaultHandler;
44

    
45
/**
46
 *
47
 * @author jjdelcerro
48
 */
49
public class StructureExtractorImpl {
50

    
51
    @SuppressWarnings("UseSpecificCatch")
52
    private void extractTags(XMLInfoImpl xmlinfo, Reader reader, SimpleTaskStatus status) {
53
        if( reader == null ) {
54
            throw new IllegalArgumentException("reader is null");
55
        }
56
        try {
57
            final DataManager dataManager = DALLocator.getDataManager();
58
            
59
            status.message("Reading xml 1/5...");
60
            status.setRangeOfValues(0, xmlinfo.getCountLines());
61
            
62
            SAXParserFactory spf = SAXParserFactory.newInstance();
63
            spf.setNamespaceAware(true);
64
            SAXParser saxParser = spf.newSAXParser();
65
            InputSource is = new InputSource(reader);
66
            
67
            List<String> path = new ArrayList<>();
68
            
69
            saxParser.parse(is, new DefaultHandler() {
70
                private Locator locator;
71
                int size;
72
                int refreshInterval = 1;
73
                StringBuilder chars = new StringBuilder();
74
                
75
                @Override
76
                public void setDocumentLocator(Locator locator) {
77
                    this.locator = locator;
78
                }
79
                
80
                @Override
81
                public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
82
                    int line = this.locator.getLineNumber();
83
                    int column = this.locator.getColumnNumber()-2-localName.length();
84

    
85
                    if(line % refreshInterval == 0) {
86
                        status.setCurValue(line);
87
                    }
88
                    
89
                    if(line > 100){
90
                        refreshInterval = 100;
91
                    } else if(line > 1000){
92
                        refreshInterval = 1000;
93
                    } else if(line > 10000){
94
                        refreshInterval = 10000;
95
                    } else if(line > 100000){
96
                        refreshInterval = 100000;
97
                    }
98

    
99
                    String idvalue = dataManager.createUniqueID();
100
                    
101
                    path.add(localName);
102
                    String path_s = StringUtils.join(path, "/");
103
                    XMLAttributeInfoImpl info = xmlinfo.getTag(path_s);
104
                    if( info == null ) {
105
                        info = new XMLAttributeInfoImpl(xmlinfo.getLocale(), path_s);
106
                        xmlinfo.addTag(info);
107
                        
108
                    }
109
                    if( path.size()>1 ) {
110
                        List<String> parentpath = path.subList(0, path.size()-1);
111
                        String parentpath_s = StringUtils.join(parentpath, "/");
112
                        XMLAttributeInfoImpl parentinfo = xmlinfo.getTag(parentpath_s);
113
                        parentinfo.incrChildCount(localName);
114
                        parentinfo.setLastChildID(localName, idvalue);
115
                    }
116
                    
117
                    for (int i = 0; i < attributes.getLength(); i++) {
118
                        String name = attributes.getLocalName(i);                        
119
                        String value = attributes.getValue(i);
120
                        String idvalueChild = dataManager.createUniqueID();
121
                        XMLAttributeInfoImpl infoChild = xmlinfo.getTag(path_s+"/"+name);
122
                        if( infoChild == null ) {
123
                            infoChild = new XMLAttributeInfoImpl(xmlinfo.getLocale(), path_s+"/"+name);
124
                            xmlinfo.addTag(infoChild);
125
                        }
126
                        info.incrChildCount(infoChild.getName());
127
                        info.setLastChildID(infoChild.getName(), idvalueChild);
128
                        infoChild.addValue(value);
129
                        if( xmlinfo.getSrid() ==  null && StringUtils.containsIgnoreCase(infoChild.getName(), "srid") ) {
130
                            IProjection proj = getProyection(value);
131
                            if( proj!=null ) {
132
                                xmlinfo.setSrid(proj);
133
                            }
134
                        }
135
                    }
136
                    chars.setLength(0);
137
                }
138
                
139
                @Override
140
                public void endElement(String uri, String localName, String qName) throws SAXException {
141
                    int line = this.locator.getLineNumber();
142

    
143
//                    status.setCurValue(line);
144

    
145
                    String path_s = StringUtils.join(path, "/");
146
                    XMLAttributeInfoImpl info = xmlinfo.getTag(path_s);
147
                    if( info == null ) {
148
                        info = new XMLAttributeInfoImpl(xmlinfo.getLocale(), path_s);
149
                        xmlinfo.addTag(info);
150
                    }
151

    
152
                    XMLAttributeInfoImpl parentinfo = null;
153
                    if( path.size()>1 ) {
154
                        List<String> parentpath = path.subList(0, path.size()-1);
155
                        String parentpath_s = StringUtils.join(parentpath, "/");
156
                        parentinfo = xmlinfo.getTag(parentpath_s);
157
                    }
158
                    
159
                    if( info.hasChilds() || (parentinfo != null && (parentinfo.getChildsCount(localName)>1))) {
160
                        String value = this.chars.toString();
161
                        if( StringUtils.isNotBlank(value) ) {
162
                            String name = info.getName()+"$v";                        
163
                            String idvalueChild = dataManager.createUniqueID();
164
                            XMLAttributeInfoImpl infoChild = xmlinfo.getTag(path_s+"/"+name);
165
                            if( infoChild == null ) {
166
                                infoChild = new XMLAttributeInfoImpl(xmlinfo.getLocale(), path_s+"/"+name);
167
                                xmlinfo.addTag(infoChild);
168
                            }
169
                            info.incrChildCount(infoChild.getName());
170
                            info.setLastChildID(infoChild.getName(), idvalueChild);
171
                            infoChild.addValue(value);
172
                        }
173
                    } else {
174
                        String value = this.chars.toString();
175
                        info.addValue(value);
176
                        if( StringUtils.containsIgnoreCase(info.getName(), "srid") ) {
177
                            IProjection proj = getProyection(value);
178
                            if( proj!=null ) {
179
                                xmlinfo.setSrid(proj);
180
                            }
181
                        }
182
                    }
183
                    info.consolidateChildCounters();
184
//                    if( StringUtils.equalsIgnoreCase("LUGAR_CIRCULABA", info.getName()) ) {
185
//                        System.out.println("Oh!");
186
//                    }
187
                    
188
                    path.remove(path.size()-1);
189
                    chars.setLength(0);
190
                }
191
                
192
                @Override
193
                public void characters(char[] ch, int start, int length) throws SAXException {
194
                    int line = this.locator.getLineNumber();
195

    
196
//                    status.setCurValue(line);
197
                    this.chars.append(ch, start, length);
198
                }
199
                
200
                
201
            });
202
        } catch (Exception ex) {
203
            throw new RuntimeException("Can't extract tags.", ex);
204
        }
205
    }
206
    
207
    private IProjection getProyection(String value) {
208
        if( StringUtils.isBlank(value) ) {
209
            return null;
210
        }
211
        IProjection proj = null;
212
        try {
213
            proj = CRSFactory.getCRS(value);
214
        } catch(Throwable t) {
215
            
216
        }
217
        if( proj != null ) {
218
            return proj;
219
        }
220
        try {
221
            proj = CRSFactory.getCRS("EPSG:"+value);
222
        } catch(Throwable t) {
223
            
224
        }
225
        return proj;
226
    }
227
    
228
    public XMLInfo extractStructure(File xml, Charset charset, IProjection projection, Locale locale, SimpleTaskStatus status) throws FileNotFoundException, IOException {
229
        XMLInfoImpl xmlinfo = new XMLInfoImpl();
230
        xmlinfo.setLocale(locale);
231
        xmlinfo.setSrid(projection);
232
        long count = Xml2dbCommons.countLines(xml, charset, status);
233
        xmlinfo.setCountLines(count);
234
        InputSource is = Xml2dbCommons.openReader(xml,charset);
235
        return extractStructure(is, xmlinfo, status);
236
    }
237
    
238
    public XMLInfo extractStructure(InputStream xml, Charset charset, IProjection projection, Locale locale, SimpleTaskStatus status) throws IOException  {
239
        XMLInfoImpl xmlinfo = new XMLInfoImpl();
240
        xmlinfo.setLocale(locale);
241
        xmlinfo.setSrid(projection);
242
        long count = Xml2dbCommons.countLines(xml, charset, status);
243
        xmlinfo.setCountLines(count);
244
        InputSource is = Xml2dbCommons.openReader(xml, charset);
245
        return extractStructure(is, xmlinfo, status);
246
    }
247
    
248
    public XMLInfo extractStructure(Reader reader, IProjection projection, Locale locale, SimpleTaskStatus status) {
249
        XMLInfoImpl xmlinfo = new XMLInfoImpl();
250
        xmlinfo.setLocale(locale);
251
        xmlinfo.setSrid(projection);
252
        InputSource is = new InputSource(reader);
253
        return extractStructure(is, xmlinfo, status);
254
    }
255
    
256
    public XMLInfo extractStructure(InputSource is, XMLInfoImpl xmlinfo, SimpleTaskStatus status) {
257
        
258
        if( xmlinfo.getCharset()==null ) {
259
            xmlinfo.setCharset(CharsetUtils.forName(is.getEncoding()));
260
        }
261
        
262
        extractTags(xmlinfo, is.getCharacterStream(), status);
263
        
264
        buildTablesFromTags(xmlinfo, status);
265

    
266
        removeDuplicateTableNames(xmlinfo, status);
267
        
268
        createPrimaryKeyAndForeignKeys(xmlinfo, status);
269
        
270
        createFeatureTypes(xmlinfo, status);
271

    
272
        return xmlinfo;
273
    }
274
    
275
    private String plural(String s) {
276
        int l = s.length();
277
        char ch = s.substring(l-1,l).toLowerCase().charAt(0);
278
        if( ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u' ) {
279
            return s+"s";
280
        }
281
        return s+"es";
282
    }
283

    
284
    private void buildTablesFromTags(XMLInfoImpl xmlinfo, SimpleTaskStatus status) {
285
        status.message("Identifying tables 2/5...");
286
        status.setRangeOfValues(0, xmlinfo.getTagsPaths().size());
287
        
288
        for (String tagPath : xmlinfo.getTagsPaths()) {
289
            XMLAttributeInfoImpl tag1Info = xmlinfo.getTagInfo(tagPath);
290
            System.out.println(tag1Info.getPath());
291
            XMLTableInfoImpl tableInfo = new XMLTableInfoImpl(tagPath, tag1Info);
292
            for (String tagPath2 : xmlinfo.getTagsPaths()) {
293
                if( tagPath.equals(tagPath2) ) {
294
                    continue;
295
                }
296
                if( tagPath2.startsWith(tagPath+"/") ) {
297
                    String fieldName = tagPath2.substring(tagPath.length()+1);
298
                    if( !fieldName.contains("/") ) {
299
                        XMLAttributeInfoImpl info = xmlinfo.getTag(tagPath2);
300
                        tableInfo.add(info);
301
                    }
302
                }
303
            }
304
            if( !tableInfo.isEmpty() ) {
305
                xmlinfo.addTable(tableInfo);
306
            }
307
            status.incrementCurrentValue();
308
        }
309
    }
310
    
311
    private void removeDuplicateTableNames(XMLInfoImpl xmlinfo, SimpleTaskStatus status) {
312
        status.message("Fixing table names 3/5...");
313
        status.setRangeOfValues(0, xmlinfo.size());
314

    
315
        // Tratamos de corregir nombres duplicados en las tablas.
316
        for (XMLTableInfo tableInfo01 : xmlinfo) {
317
            XMLTableInfoImpl tableInfo1 = (XMLTableInfoImpl) tableInfo01;
318
            
319
            status.message("Fixing table names 3/5 ("+tableInfo1.getName()+")...");
320

    
321
            boolean renombrar = false;
322
            for (XMLTableInfo tableInfo02 : xmlinfo) {
323
                XMLTableInfoImpl tableInfo2 = (XMLTableInfoImpl) tableInfo02;
324
                if( tableInfo1 == tableInfo2 ) {
325
                    continue;
326
                }
327
                if( StringUtils.equalsIgnoreCase(tableInfo1.getName(), tableInfo2.getName()) ) {
328
                    // FIXME: solo usa los dos ultimos nombres, podrian coincidir igualmente!!
329
                    String[] path_ss = tableInfo2.getPath().split("/");
330
                    int l = path_ss.length;
331
                    String name = path_ss[l-1]+"_"+path_ss[l-2];
332
                    tableInfo2.rename(name);
333
                    renombrar = true;
334
//                    System.out.println("###: Tablas con el mismo nombre, renombrado a "+name);
335
//                    System.out.println("###: "+tableInfo1.getPath());
336
//                    System.out.println("###: "+tableInfo2.getPath());
337
                }
338
            }
339
            if( renombrar ) {
340
                // FIXME: solo usa los dos ultimos nombres, podrian coincidir igualmente!!
341
                String[] path_ss = tableInfo1.getPath().split("/");
342
                int l = path_ss.length;
343
                String name = path_ss[l-1]+"_"+path_ss[l-2];
344
                tableInfo1.rename(name);
345
            }
346
            status.incrementCurrentValue();
347
        }
348
    }
349
    
350
    private void createPrimaryKeyAndForeignKeys(XMLInfoImpl xmlinfo, SimpleTaskStatus status) {
351
        status.message("Identifying foreign keys 4/5...");
352
        status.setRangeOfValues(0, xmlinfo.size());
353

    
354
        for (XMLTableInfo tableInfo0 : xmlinfo) {
355
            XMLTableInfoImpl tableInfo = (XMLTableInfoImpl) tableInfo0;
356
            
357
            status.message("Identifying foreign keys 4/5 ("+tableInfo.getName()+")...");
358

    
359
            String[] fieldkeys = tableInfo.getPath().split("/");
360
            for (int i = 0; i < fieldkeys.length; i++) {
361
                String fieldkey = fieldkeys[i];
362
                if( i==fieldkeys.length-1 ) {
363
                    tableInfo.add(new XMLAttributeInfoImpl(xmlinfo.getLocale(), "$ID_"+tableInfo.getName(), DataTypes.INT).setPk(true));
364
                } else { 
365
                    // TODO es una relacion 1:1
366
                    XMLTableInfo tableInfo2 = xmlinfo.getTableByPath(StringUtils.join(ArrayUtils.subarray(fieldkeys, 0, i+1),"/"));
367
                    tableInfo.add(new XMLAttributeInfoImpl(xmlinfo.getLocale(), "$ID_"+tableInfo2.getName(), DataTypes.INT)
368
                            .setFk(true)
369
                            .setFkCodeName("$ID_"+fieldkey)
370
                            .setFkTableName(fieldkey)
371
                    );
372
                }
373
            }
374

    
375
            for (XMLAttributeInfo field0 : tableInfo) {
376
                XMLAttributeInfoImpl field = (XMLAttributeInfoImpl) field0;
377
                if( xmlinfo.existsTableByPath(tableInfo.getPath()+"/"+field.getName()) ) {
378
                    XMLTableInfoImpl fktableInfo = xmlinfo.getTableByPath(tableInfo.getPath()+"/"+field.getName());
379
                    int count = tableInfo.getMaxCountChild(field.getName());
380
                    if( count<1 ) {
381
                        // Do nothing
382
                    } else if( count>1 ) {
383
                        // TODO es una relacion 1:n
384
                        field.setAggregate(true);
385
                        field.setFkCodeName("$ID_"+tableInfo.getName());
386
                        field.setFkTableName(fktableInfo.getName());
387
                        field.setSize(45);
388
                        field.setType(DataTypes.LIST);
389
                    } else {
390
                        // TODO es una relacion 1:1
391
                        field.setFk(true);
392
                        field.setFkCodeName("$ID_"+fktableInfo.getName());
393
                        field.setFkTableName(fktableInfo.getName());
394
                        field.setSize(0);
395
                        field.setType(DataTypes.INT);
396
                    }
397
                }
398
            }            
399
            tableInfo.sort();
400
//            System.out.println(tableInfo);
401
            status.incrementCurrentValue();
402
        }
403
    }
404
    
405
    private void createFeatureTypes(XMLInfoImpl xmlinfo, SimpleTaskStatus status) {
406
        status.message("Creating table definitions 5/5...");
407
        status.setRangeOfValues(0, xmlinfo.size());
408

    
409
        DataManager dataManager = DALLocator.getDataManager();
410

    
411
        for (XMLTableInfo tableInfo0 : xmlinfo) {
412
            XMLTableInfoImpl tableInfo = (XMLTableInfoImpl) tableInfo0;
413

    
414
            status.message("Creating table definitions 5/5 "+(tableInfo.getName())+"...");
415
            
416
            EditableFeatureType ft = dataManager.createFeatureType();
417
            ft.setLabel(tableInfo.getName());
418
            ft.getTags().set("xml2db.tablename", tableInfo.getName());
419
            ft.getTags().set("vcsgis.storename", tableInfo.getName());
420
            ft.getTags().set("xml2db.path", tableInfo.getPath());
421
            for (XMLAttributeInfo attrinfo0 : tableInfo) {
422
                XMLAttributeInfoImpl attrinfo = (XMLAttributeInfoImpl) attrinfo0;
423
                if( ft.get(attrinfo.getName())!=null ) {
424
                    System.out.println("WARN: attribute '"+attrinfo.getName()+"' duplicated in '"+tableInfo.getName()+"'.");
425
                }
426
                if(  attrinfo.isAggregate() ) {
427
                    continue;
428
                }
429
                EditableFeatureAttributeDescriptor attrdesc;
430
                if( attrinfo.getName().startsWith("$ID") ) {
431
                    attrdesc = ft.add(attrinfo.getName(), attrinfo.getType());
432
                    attrdesc.setLabel("Id. "+attrinfo.getName().substring(4));
433
                    attrdesc.setSize(attrinfo.getSize());
434
                } else if( attrinfo.getName().endsWith("$v") ) {
435
                    attrdesc = ft.add(attrinfo.getName(), attrinfo.getType());
436
                    attrdesc.setLabel(StringUtils.left(attrinfo.getName(),attrinfo.getName().length()-2));
437
                    attrdesc.setSize(attrinfo.getSize());
438
                } else {
439
                    attrdesc = ft.add(attrinfo.getName(), attrinfo.getType());
440
                    attrdesc.setSize(attrinfo.getSize());
441
                    attrdesc.setPrecision(attrinfo.getPrecision());
442
                    attrdesc.setScale(attrinfo.getScale());
443
                    if( attrdesc.getType()==DataTypes.GEOMETRY ) {
444
                        attrdesc.setGeometryType(attrinfo.getGeometryType());
445
                        attrdesc.setSRS(xmlinfo.getSrid());
446
                    }
447
                }
448
                attrdesc.setLocale(xmlinfo.getLocale());
449
                attrdesc.setIsPrimaryKey(attrinfo.isPk());
450
                if( attrinfo.isFk() ) {
451
                    EditableForeingKey fk = attrdesc.getForeingKey();
452
                    fk.setForeingKey(true);
453
                    fk.setTableName(attrinfo.getFkTableName());
454
                    fk.setCodeName(attrinfo.getFkCodeName());
455
                    fk.setClosedList(false);
456
                    fk.setLabelFormula("\""+attrinfo.getFkCodeName()+"\"");
457
                    attrdesc.setRelationType(DynField_v2.RELATION_TYPE_COLLABORATION);
458
                    attrdesc.setIsIndexed(true);
459
                }
460
            }
461
            FeatureAttributeDescriptor pk = ft.getPrimaryKey()[0];
462
            for (XMLAttributeInfo attrinfo0 : tableInfo) {
463
                XMLAttributeInfoImpl attrinfo = (XMLAttributeInfoImpl) attrinfo0;
464
                if(  attrinfo.isAggregate() ) {
465
                    EditableFeatureAttributeDescriptor attrdesc = ft.add("$List_"+attrinfo.getName(), attrinfo.getType());
466
                    attrdesc.setLabel(plural(attrinfo.getName()));
467
                    attrdesc.getTags().set("dal.relatedfeatures.table", attrinfo.getFkTableName());
468
                    attrdesc.getTags().set("dal.relatedfeatures.unique.field.name", attrinfo.getFkCodeName());
469
                    attrdesc.getTags().set("dynform.label.empty", true);
470
                    attrdesc.getTags().set("dynform.resizeWeight", 100);
471
                    attrdesc.setFeatureAttributeEmulator(
472
                        "SELECT * FROM \""+attrinfo.getFkTableName() + "\" WHERE ( (\""+pk.getName()+"\") = (\""+attrinfo.getFkTableName() + "\".\""+pk.getName()+"\") )"                    
473
                    );
474
                    attrdesc.setRelationType(DynField_v2.RELATION_TYPE_AGGREGATE);
475
                }
476
            }
477
            tableInfo.setFeatureType(ft);
478
            status.incrementCurrentValue();
479
        }
480
    }
481
}