Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / CSVUtils.java @ 46093

History | View | Annotate | Download (18.2 KB)

1
/**
2
 * gvSIG. Desktop Geographic Information System.
3
 *
4
 * Copyright (C) 2007-2013 gvSIG Association.
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 3
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19
 * MA 02110-1301, USA.
20
 *
21
 * For any additional information, do not hesitate to contact us
22
 * at info AT gvsig.com, or visit our website www.gvsig.com.
23
 */
24

    
25
package org.gvsig.fmap.dal.store.csv;
26

    
27
import java.io.File;
28
import java.io.FileInputStream;
29
import java.io.FileNotFoundException;
30
import java.io.FileReader;
31
import java.io.IOException;
32
import java.io.InputStream;
33
import java.io.InputStreamReader;
34
import java.io.Reader;
35
import java.nio.charset.Charset;
36
import java.util.Locale;
37
import java.util.Map;
38
import org.apache.commons.io.FilenameUtils;
39
import org.apache.commons.io.IOUtils;
40
import org.apache.commons.io.input.BOMInputStream;
41
import org.apache.commons.lang3.ArrayUtils;
42
import org.apache.commons.lang3.StringUtils;
43
import org.gvsig.fmap.dal.DataTypes;
44
import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor;
45
import org.gvsig.fmap.dal.feature.EditableFeatureType;
46
import static org.gvsig.fmap.dal.store.csv.CSVStoreProvider.NAME;
47
import org.gvsig.fmap.dal.store.csv.simplereaders.CSVReaderSuperCSV;
48
import org.gvsig.fmap.dal.store.csv.simplereaders.FixedLenReader;
49
import org.gvsig.fmap.dal.store.csv.simplereaders.JSonReader;
50
import org.gvsig.fmap.dal.store.csv.simplereaders.SimpleReader;
51
import org.gvsig.fmap.geom.Geometry;
52
import org.gvsig.fmap.geom.GeometryLocator;
53
import org.gvsig.fmap.geom.GeometryManager;
54
import org.gvsig.fmap.geom.type.GeometryType;
55
import org.gvsig.tools.dynobject.Tags;
56
import org.gvsig.tools.task.SimpleTaskStatus;
57
import org.slf4j.Logger;
58
import org.slf4j.LoggerFactory;
59

    
60
/**
61
 *
62
 * @author gvSIG  Team
63
 */
64
@SuppressWarnings("UseSpecificCatch")
65
public class CSVUtils {
66
    
67
    private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtils.class);
68

    
69
    private CSVUtils() {
70
        
71
    }
72
    public static InputStreamReader openFile(File f, String charsetName) throws FileNotFoundException {
73
        String fullFileName = f==null? "NULL":f.getAbsolutePath();
74
        Charset charset = Charset.defaultCharset();
75
        InputStream fis = new BOMInputStream(new FileInputStream(f));
76
        if (StringUtils.isNotBlank(charsetName)) {
77
            if (Charset.isSupported(charsetName)) {
78
                try {
79
                    charset = Charset.forName(charsetName);
80
                } catch (Throwable th) {
81
                    LOGGER.warn("Can't use charset '" + charsetName + "' for read csv '" + fullFileName + "'.", th);
82
                }
83
            } else {
84
                LOGGER.warn("charset '" + charsetName + "' not supported for read csv '" + fullFileName + "'.");
85
            }
86
        }
87
        InputStreamReader isr = new InputStreamReader(fis, charset);
88
        return isr;
89
    }
90
    
91
    public static boolean loadFeatureType(CSVStoreParameters parameters, EditableFeatureType featureType, boolean  detectTypes, SimpleTaskStatus status) throws IOException {
92
        InputStreamReader in = null;
93
        SimpleReader reader = null;
94
        try {
95
            String headers[];
96

    
97
            in = openFile(
98
                    parameters.getFile(),
99
                    CSVStoreParameters.getCharset(parameters)
100
            );
101

    
102
            reader = getSimpleReader(parameters, in);
103

    
104
            headers = CSVStoreParameters.getHeaders(parameters);
105
            if (headers == null) {
106
                if (CSVStoreParameters.isFirstLineHeader(parameters)) {
107
                    headers = reader.getHeader();
108
                    if (headers == null) {
109
                        if (CSVStoreParameters.getIgnoreErrors(parameters)) {
110
                            headers = getFixedHeaders(reader.getColumnsCount());
111
                        } else {
112
                            String msg = "Can't retrieve header from csv file '"
113
                                    + parameters.getFile()
114
                                            .getAbsolutePath()
115
                                    + "' and not specified in the parameters.";
116
                            LOGGER.warn(msg);
117
                            throw new RuntimeException(msg);
118
                        }
119
                    }
120
                } else {
121
                    headers = getFixedHeaders(reader.getColumnsCount());
122
                }
123
            } else {
124
                if (CSVStoreParameters.isFirstLineHeader(parameters)) {
125
                    reader.getHeader(); // Skip and ignore the header of file
126
                }
127
            }
128

    
129
            AutomaticDetectionOfTypes.DetectedValue[] detectedTypes = null;
130
            if( detectTypes ) {
131
                detectedTypes = automaticDetectionOfTypes(parameters, headers, status);
132
            }
133
            if( StringUtils.isBlank(headers[headers.length-1]) &&
134
                (detectedTypes==null || detectedTypes[headers.length-1].isBlank()) ) {
135
                headers = ArrayUtils.remove(headers, headers.length-1);
136
            }
137
            if (detectedTypes != null && detectedTypes.length > headers.length) {
138
                // Se han detectado mas columnas que las que hay en la cabezera,
139
                // a?adimos mas columnas a la cabezera.
140
                String[] headers2 = new String[detectedTypes.length];
141
                for (int i = 0; i < headers2.length; i++) {
142
                    if (i < headers.length) {
143
                        headers2[i] = headers[i];
144
                    } else {
145
                        headers2[i] = getFixedHeader(i);
146
                    }
147
                }
148
                headers = headers2;
149
            }
150
            for (int i = 0; i < headers.length; i++) {
151
                if (StringUtils.isBlank(headers[i])) {
152
                    headers[i] = getFixedHeader(i);
153
                }
154
            }
155
            // Initialize the feature types
156
            return fillFeatureType(parameters, featureType,  headers, detectedTypes);
157
        } finally {
158
            IOUtils.closeQuietly(in);
159
            IOUtils.closeQuietly(reader);
160
        }
161
    }
162
    public static SimpleReader getSimpleReader(CSVStoreParameters parameters, Reader in) throws IOException {
163
        SimpleReader reader;
164
        String filename = CSVStoreParameters.getFileName(parameters);
165
        if (FilenameUtils.isExtension(filename, "json")){
166
            reader= new JSonReader(in,parameters);
167
        } else if (CSVStoreParameters.getRawFieldsDefinition(parameters) != null) {
168
            reader = new FixedLenReader(in, parameters);
169
        } else {
170
            reader = new CSVReaderSuperCSV(in, parameters);
171
        }
172
        return reader;
173
    }
174

    
175
    private static String getFixedHeader(int column) {
176
        char[] header = new char[3];
177

    
178
        String s = String.format("%03d", column);
179
        header[0] = (char) (s.charAt(0) + 17);
180
        header[1] = (char) (s.charAt(1) + 17);
181
        header[2] = (char) (s.charAt(2) + 17);
182
        return String.valueOf(header);
183
    }
184

    
185
    private static String[] getFixedHeaders(int count) {
186
        String[] headers = new String[count];
187
        for (int i = 0; i < headers.length; i++) {
188
            headers[i] = getFixedHeader(i);
189
        }
190
        return headers;
191
    }
192

    
193
    private static AutomaticDetectionOfTypes.DetectedValue[] automaticDetectionOfTypes(CSVStoreParameters parameters, String[] headers, SimpleTaskStatus status) throws IOException {
194
        String fullFileName = parameters.getFile()==null? "NULL":parameters.getFile().getAbsolutePath();
195
        boolean automatic_types_detection = CSVStoreParameters.getAutomaticTypesDetection(parameters);
196
        if (!automatic_types_detection) {
197
            return null;
198
        }
199
        AutomaticDetectionOfTypes.DetectedValue[] types = null;
200

    
201
        Reader in = null;
202
        SimpleReader reader = null;
203

    
204
        try {
205
            in = openFile(
206
                    parameters.getFile(),
207
                    CSVStoreParameters.getCharset(parameters)
208
            );
209
            reader = getSimpleReader(parameters, in);
210
            AutomaticDetectionOfTypes x = new AutomaticDetectionOfTypes(
211
                    fullFileName
212
            );
213
            types = x.detect(
214
                    headers.length,
215
                    reader,
216
                    CSVStoreParameters.isFirstLineHeader(parameters),
217
                    CSVStoreParameters.getLocale(parameters),
218
                    status
219
            );
220
        } catch (Exception ex) {
221
            int lineno = 0;
222
            if (reader != null) {
223
                lineno = reader.getLine();
224
            }
225
            throw new RuntimeException("Problems reading file '" + fullFileName + "' near line " + lineno + ".", ex);
226

    
227
        } finally {
228
            IOUtils.closeQuietly(reader);
229
            IOUtils.closeQuietly(in);
230
        }
231
        return types;
232
    }
233
    
234
    private static boolean fillFeatureType(CSVStoreParameters parameters, EditableFeatureType fType, String headers[], AutomaticDetectionOfTypes.DetectedValue automaticTypes[]) {
235
        String fullFileName = parameters.getFile()==null? "":parameters.getFile().getAbsolutePath();
236
        String providerName = NAME;
237
        
238
        fType.setHasOID(true);
239

    
240

    
241
        FieldTypeParser[] fieldTypes = new FieldTypeParser[headers.length];
242
        //
243
        // Calculamos cuales pueden ser los tipos de datos
244
        //
245
        for (int i = 0; i < fieldTypes.length; i++) {
246
            fieldTypes[i] = new FieldTypeParser(providerName, fullFileName);
247
        }
248

    
249
        // Asuminos los tipos pasados por parametro, que se supone
250
        // son los detectados automaticamente.
251
        if (automaticTypes != null) {
252
            for (int i = 0; i < fieldTypes.length && i < automaticTypes.length; i++) {
253
                fieldTypes[i].detectedValue = automaticTypes[i];
254
                fieldTypes[i].type = automaticTypes[i].getType();
255
            }
256
        }
257
        // Luego probamos con lo que diga las cabezeras del CVS, sobreescribiendo
258
        // los tipos anteriores en caso de definirse en la cabezara.
259
        boolean all_fields_declare_type = true;
260
        for (int i = 0; i < fieldTypes.length; i++) {
261
            if (!fieldTypes[i].parse(headers[i])) {
262
                LOGGER.warn("Can't parse header of field "+i+ "( "+headers[i]+") in '"+providerName+"' file '" + fullFileName + "'.");
263
            }
264
            if( fieldTypes[i].type == DataTypes.UNKNOWN ) {
265
                all_fields_declare_type = false;
266
                fieldTypes[i].type = DataTypes.STRING;
267
            }
268
        }
269

    
270
        // Y por ultimo hacemos caso a lo que se haya especificado en los parametros
271
        // de apertura del CSV, teniendo esto prioridad sobre todo.
272
        String param_types_def = CSVStoreParameters.getRawFieldTypes(parameters);
273
        if (StringUtils.isNotBlank(param_types_def)) {
274
            String sep = CSVStoreParameters.getDelimiter(param_types_def);
275
            if (StringUtils.isNotBlank(sep)) {
276
                String[] param_types = param_types_def.split(sep);
277
                FieldTypeParser parser = new FieldTypeParser(providerName, fullFileName);
278
                for (String param_type : param_types) {
279
                    parser.clear();
280
                    parser.parse(param_type);
281
                    for (FieldTypeParser fieldType : fieldTypes) {
282
                        if (StringUtils.equalsIgnoreCase(fieldType.name, parser.name)) {
283
                            fieldType.copyFrom(parser);
284
                            break;
285
                        }
286
                    }
287
                }
288
            }
289
        }
290
        //
291
        // Una vez ya sabemos los tipos de datos rellenamos el feature-type
292
        //
293
        Tags ftypeTags = fType.getTags();
294
        for (FieldTypeParser fieldType : fieldTypes) {
295
            EditableFeatureAttributeDescriptor fad = fType.add(fieldType.name, fieldType.type);
296
            if( fieldType.detectedValue!=null ) {
297
                fad.setDisplaySize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
298
                fad.setSize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
299
                if( fad.getPrecision()<fieldType.detectedValue.getPrecision() ) {
300
                    fad.setPrecision(fieldType.detectedValue.getPrecision());
301
                }
302
                if( fad.getScale()<fieldType.detectedValue.getScale()) {
303
                    fad.setScale(fieldType.detectedValue.getScale());
304
                }
305
            } else {
306
                fad.setDisplaySize(fieldType.size);
307
            }
308
            if (fieldType.type == DataTypes.GEOMETRY ) {
309
                fad.setGeometryType(fieldType.geomType, fieldType.geomSubtype);
310
                if( fType.getDefaultGeometryAttributeName() == null ) {
311
                    fType.setDefaultGeometryAttributeName(fieldType.name);
312
                }
313
            } 
314
            Locale locale = null;
315
            if (fieldType.type == DataTypes.TIMESTAMP ) {
316
                if(!CSVStoreParameters.isBlankOrDefaultLocale(parameters)){
317
                    locale = CSVStoreParameters.getLocale(parameters);
318
                }
319
            } else {
320
                locale = CSVStoreParameters.getLocale(parameters);
321
            }
322
            fad.setLocale(locale);
323
            for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) {
324
                try {
325
                    switch(entry.getKey().toLowerCase()) {
326
                        case "expression":
327
                            // Los campos calculados los procesamos en una segunda
328
                            // pasada, cuando ya estan definidos el resto de los campos
329
                            // ya que pueden requerir campos que aun no se han definido.
330
                            break;
331
                        default:
332
                                fad.set(entry.getKey(), entry.getValue());
333
                            }
334
                } catch (Exception ex) {
335
                    LOGGER.warn("Can't set property '"+entry.getKey()+"' of '"+fad.getName()+"'.", ex);
336
                }
337
            }            
338
            Tags tags = fad.getTags();
339
            for (Map.Entry<String, String> entry : fieldType.tags.entrySet()) {
340
                tags.set(entry.getKey(), entry.getValue());
341
            }
342
            for (Map.Entry<String, String> entry : fieldType.typetags.entrySet()) {
343
                ftypeTags.set(entry.getKey(), entry.getValue());
344
            }
345
            for (Map.Entry<String, String> entry : fieldType.typeAssignments.entrySet()) {
346
                try {
347
                    fType.set(entry.getKey(), entry.getValue());
348
                } catch(Exception ex) {
349
                    LOGGER.warn("Can't set attribute '"+entry.getKey()+"' in the feature type.", ex);
350
                }
351
            }
352
        }
353
        // Processamos ahora los campos calculados
354
        for (FieldTypeParser fieldType : fieldTypes) {
355
            EditableFeatureAttributeDescriptor fad = fType.getEditableAttributeDescriptor(fieldType.name);
356
            for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) {
357
                try {
358
                    switch(entry.getKey().toLowerCase()) {
359
                        case "expression":
360
                            fad.set(entry.getKey(), entry.getValue());
361
                            break;
362
                    }
363
                } catch (Exception ex) {
364
                    LOGGER.warn("Can't set property '"+entry.getKey()+"' in '"+fad.getName()+"' of '"+fullFileName+"'.", ex);
365
                }
366
            }
367
        }
368
        String[] pointDimensionNames = CSVStoreParameters.getPointDimensionNames(parameters);
369
        if ( pointDimensionNames != null ) {
370
            CSVPointAttributeEmulator emulator = new CSVPointAttributeEmulator(pointDimensionNames);
371
            String columnName = CSVStoreParameters.getPointColumnName(parameters);
372
            if( StringUtils.isBlank(columnName) ) {
373
                columnName = "geom";
374
            }
375
            EditableFeatureAttributeDescriptor attr = fType.add(columnName, DataTypes.GEOMETRY, emulator);
376
            GeometryManager geommgr = GeometryLocator.getGeometryManager();
377
            GeometryType gt;
378
            try {
379
                if ( emulator.getFieldNames() != null && emulator.getFieldNames().length <= 2 ) {
380
                        gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM2D);
381
                } else {
382
                        gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM3D);
383
                }
384
                attr.setGeometryType(gt);
385
            } catch (Exception e) {
386
                LOGGER.warn("Can't set geometry type for the calculated field in '"+providerName+"' file '" + fullFileName + "'.", e);
387
            }
388
        }        
389
        
390
        String geometry_column = CSVStoreParameters.getGeometryColumn(parameters);
391
        if (!StringUtils.isEmpty(geometry_column)) {
392
            EditableFeatureAttributeDescriptor attr = (EditableFeatureAttributeDescriptor) fType.get(geometry_column);
393
            if (attr != null ) {
394
                if( attr.getType() != DataTypes.GEOMETRY ) {
395
                    attr.setDataType(DataTypes.GEOMETRY);
396
                }
397
                GeometryManager geommgr = GeometryLocator.getGeometryManager();
398
                GeometryType gt;
399
                try {
400
                    gt = geommgr.getGeometryType(
401
                            CSVStoreParameters.getGeometryType(parameters),
402
                            CSVStoreParameters.getGeometrySubType(parameters)
403
                    );
404
                    attr.setGeometryType(gt);
405
                } catch (Exception e) {
406
                    LOGGER.warn("Can't set geometry type for the calculated field in CSV file '" + fullFileName + "'.", e);
407
                }
408
                fType.setDefaultGeometryAttributeName(geometry_column);
409
            }
410
        }
411
        return all_fields_declare_type;
412
    }
413
    
414
}