svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / CSVUtils.java @ 45904
History | View | Annotate | Download (17.5 KB)
1 | 45685 | jjdelcerro | /**
|
---|---|---|---|
2 | * gvSIG. Desktop Geographic Information System.
|
||
3 | *
|
||
4 | * Copyright (C) 2007-2013 gvSIG Association.
|
||
5 | *
|
||
6 | * This program is free software; you can redistribute it and/or
|
||
7 | * modify it under the terms of the GNU General Public License
|
||
8 | * as published by the Free Software Foundation; either version 3
|
||
9 | * of the License, or (at your option) any later version.
|
||
10 | *
|
||
11 | * This program is distributed in the hope that it will be useful,
|
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
14 | * GNU General Public License for more details.
|
||
15 | *
|
||
16 | * You should have received a copy of the GNU General Public License
|
||
17 | * along with this program; if not, write to the Free Software
|
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||
19 | * MA 02110-1301, USA.
|
||
20 | *
|
||
21 | * For any additional information, do not hesitate to contact us
|
||
22 | * at info AT gvsig.com, or visit our website www.gvsig.com.
|
||
23 | */
|
||
24 | |||
25 | package org.gvsig.fmap.dal.store.csv; |
||
26 | |||
27 | import java.io.File; |
||
28 | import java.io.FileInputStream; |
||
29 | import java.io.FileNotFoundException; |
||
30 | import java.io.FileReader; |
||
31 | import java.io.IOException; |
||
32 | import java.io.InputStreamReader; |
||
33 | import java.nio.charset.Charset; |
||
34 | import java.util.Locale; |
||
35 | import java.util.Map; |
||
36 | import org.apache.commons.io.FilenameUtils; |
||
37 | import org.apache.commons.io.IOUtils; |
||
38 | 45775 | jjdelcerro | import org.apache.commons.lang3.ArrayUtils; |
39 | 45685 | jjdelcerro | import org.apache.commons.lang3.StringUtils; |
40 | import org.gvsig.fmap.dal.DataTypes; |
||
41 | import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor; |
||
42 | import org.gvsig.fmap.dal.feature.EditableFeatureType; |
||
43 | import static org.gvsig.fmap.dal.store.csv.CSVStoreProvider.NAME; |
||
44 | import org.gvsig.fmap.dal.store.csv.simplereaders.CSVReaderSuperCSV; |
||
45 | import org.gvsig.fmap.dal.store.csv.simplereaders.FixedLenReader; |
||
46 | import org.gvsig.fmap.dal.store.csv.simplereaders.JSonReader; |
||
47 | import org.gvsig.fmap.dal.store.csv.simplereaders.SimpleReader; |
||
48 | import org.gvsig.fmap.geom.Geometry; |
||
49 | import org.gvsig.fmap.geom.GeometryLocator; |
||
50 | import org.gvsig.fmap.geom.GeometryManager; |
||
51 | import org.gvsig.fmap.geom.type.GeometryType; |
||
52 | import org.gvsig.tools.dynobject.Tags; |
||
53 | import org.slf4j.Logger; |
||
54 | import org.slf4j.LoggerFactory; |
||
55 | |||
56 | /**
|
||
57 | *
|
||
58 | * @author gvSIG Team
|
||
59 | */
|
||
60 | 45720 | jjdelcerro | @SuppressWarnings("UseSpecificCatch") |
61 | 45685 | jjdelcerro | public class CSVUtils { |
62 | |||
63 | private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtils.class); |
||
64 | |||
65 | private CSVUtils() {
|
||
66 | |||
67 | } |
||
68 | public static InputStreamReader openFile(File f, String charsetName) throws FileNotFoundException { |
||
69 | String fullFileName = f==null? "NULL":f.getAbsolutePath(); |
||
70 | Charset charset = Charset.defaultCharset(); |
||
71 | FileInputStream fis = new FileInputStream(f); |
||
72 | 45720 | jjdelcerro | if (StringUtils.isNotBlank(charsetName)) {
|
73 | 45685 | jjdelcerro | if (Charset.isSupported(charsetName)) { |
74 | try {
|
||
75 | charset = Charset.forName(charsetName);
|
||
76 | } catch (Throwable th) { |
||
77 | LOGGER.warn("Can't use charset '" + charsetName + "' for read csv '" + fullFileName + "'.", th); |
||
78 | } |
||
79 | } else {
|
||
80 | LOGGER.warn("charset '" + charsetName + "' not supported for read csv '" + fullFileName + "'."); |
||
81 | } |
||
82 | } |
||
83 | InputStreamReader isr = new InputStreamReader(fis, charset); |
||
84 | return isr;
|
||
85 | } |
||
86 | |||
87 | public static boolean loadFeatureType(CSVStoreParameters parameters, EditableFeatureType featureType, boolean detectTypes) throws IOException { |
||
88 | InputStreamReader in = null; |
||
89 | SimpleReader reader = null;
|
||
90 | try {
|
||
91 | String headers[]; |
||
92 | |||
93 | in = openFile( |
||
94 | parameters.getFile(), |
||
95 | CSVStoreParameters.getCharset(parameters) |
||
96 | ); |
||
97 | |||
98 | reader = getSimpleReader(parameters, in); |
||
99 | |||
100 | headers = CSVStoreParameters.getHeaders(parameters); |
||
101 | if (headers == null) { |
||
102 | if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
||
103 | headers = reader.getHeader(); |
||
104 | if (headers == null) { |
||
105 | if (CSVStoreParameters.getIgnoreErrors(parameters)) {
|
||
106 | headers = getFixedHeaders(reader.getColumnsCount()); |
||
107 | } else {
|
||
108 | String msg = "Can't retrieve header from csv file '" |
||
109 | + parameters.getFile() |
||
110 | .getAbsolutePath() |
||
111 | + "' and not specified in the parameters.";
|
||
112 | LOGGER.warn(msg); |
||
113 | throw new RuntimeException(msg); |
||
114 | } |
||
115 | } |
||
116 | } else {
|
||
117 | headers = getFixedHeaders(reader.getColumnsCount()); |
||
118 | } |
||
119 | } else {
|
||
120 | if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
||
121 | reader.getHeader(); // Skip and ignore the header of file
|
||
122 | } |
||
123 | } |
||
124 | |||
125 | AutomaticDetectionOfTypes.DetectedValue[] detectedTypes = null; |
||
126 | if( detectTypes ) {
|
||
127 | detectedTypes = automaticDetectionOfTypes(parameters, headers); |
||
128 | } |
||
129 | 45775 | jjdelcerro | if( StringUtils.isBlank(headers[headers.length-1]) && |
130 | (detectedTypes==null || detectedTypes[headers.length-1].isBlank()) ) { |
||
131 | headers = ArrayUtils.remove(headers, headers.length-1);
|
||
132 | } |
||
133 | 45685 | jjdelcerro | if (detectedTypes != null && detectedTypes.length > headers.length) { |
134 | // Se han detectado mas columnas que las que hay en la cabezera,
|
||
135 | // a?adimos mas columnas a la cabezera.
|
||
136 | String[] headers2 = new String[detectedTypes.length]; |
||
137 | for (int i = 0; i < headers2.length; i++) { |
||
138 | if (i < headers.length) {
|
||
139 | headers2[i] = headers[i]; |
||
140 | } else {
|
||
141 | headers2[i] = getFixedHeader(i); |
||
142 | } |
||
143 | } |
||
144 | headers = headers2; |
||
145 | } |
||
146 | for (int i = 0; i < headers.length; i++) { |
||
147 | 45775 | jjdelcerro | if (StringUtils.isBlank(headers[i])) {
|
148 | 45685 | jjdelcerro | headers[i] = getFixedHeader(i); |
149 | } |
||
150 | } |
||
151 | // Initialize the feature types
|
||
152 | return fillFeatureType(parameters, featureType, headers, detectedTypes);
|
||
153 | } finally {
|
||
154 | IOUtils.closeQuietly(in); |
||
155 | IOUtils.closeQuietly(reader); |
||
156 | } |
||
157 | } |
||
158 | public static SimpleReader getSimpleReader(CSVStoreParameters parameters, InputStreamReader in) throws IOException { |
||
159 | SimpleReader reader; |
||
160 | String filename = CSVStoreParameters.getFileName(parameters);
|
||
161 | if (FilenameUtils.isExtension(filename, "json")){ |
||
162 | reader= new JSonReader(in,parameters);
|
||
163 | } else if (CSVStoreParameters.getRawFieldsDefinition(parameters) != null) { |
||
164 | reader = new FixedLenReader(in, parameters);
|
||
165 | } else {
|
||
166 | reader = new CSVReaderSuperCSV(in, parameters);
|
||
167 | } |
||
168 | return reader;
|
||
169 | } |
||
170 | |||
171 | private static String getFixedHeader(int column) { |
||
172 | char[] header = new char[3]; |
||
173 | |||
174 | String s = String.format("%03d", column); |
||
175 | header[0] = (char) (s.charAt(0) + 17); |
||
176 | header[1] = (char) (s.charAt(1) + 17); |
||
177 | header[2] = (char) (s.charAt(2) + 17); |
||
178 | return String.valueOf(header); |
||
179 | } |
||
180 | |||
181 | private static String[] getFixedHeaders(int count) { |
||
182 | String[] headers = new String[count]; |
||
183 | for (int i = 0; i < headers.length; i++) { |
||
184 | headers[i] = getFixedHeader(i); |
||
185 | } |
||
186 | return headers;
|
||
187 | } |
||
188 | |||
189 | private static AutomaticDetectionOfTypes.DetectedValue[] automaticDetectionOfTypes(CSVStoreParameters parameters, String[] headers) throws IOException { |
||
190 | String fullFileName = parameters.getFile()==null? "NULL":parameters.getFile().getAbsolutePath(); |
||
191 | boolean automatic_types_detection = CSVStoreParameters.getAutomaticTypesDetection(parameters);
|
||
192 | if (!automatic_types_detection) {
|
||
193 | return null; |
||
194 | } |
||
195 | AutomaticDetectionOfTypes.DetectedValue[] types = null; |
||
196 | |||
197 | FileReader in = null; |
||
198 | SimpleReader reader = null;
|
||
199 | |||
200 | try {
|
||
201 | in = new FileReader(parameters.getFile()); |
||
202 | reader = getSimpleReader(parameters, in); |
||
203 | AutomaticDetectionOfTypes x = new AutomaticDetectionOfTypes(
|
||
204 | fullFileName |
||
205 | ); |
||
206 | types = x.detect( |
||
207 | headers.length, |
||
208 | reader, |
||
209 | CSVStoreParameters.isFirstLineHeader(parameters), |
||
210 | CSVStoreParameters.getLocale(parameters) |
||
211 | ); |
||
212 | } catch (Exception ex) { |
||
213 | int lineno = 0; |
||
214 | if (reader != null) { |
||
215 | lineno = reader.getLine(); |
||
216 | } |
||
217 | throw new RuntimeException("Problems reading file '" + fullFileName + "' near line " + lineno + ".", ex); |
||
218 | |||
219 | } finally {
|
||
220 | IOUtils.closeQuietly(reader); |
||
221 | IOUtils.closeQuietly(in); |
||
222 | } |
||
223 | return types;
|
||
224 | } |
||
225 | |||
226 | private static boolean fillFeatureType(CSVStoreParameters parameters, EditableFeatureType fType, String headers[], AutomaticDetectionOfTypes.DetectedValue automaticTypes[]) { |
||
227 | String fullFileName = parameters.getFile()==null? "":parameters.getFile().getAbsolutePath(); |
||
228 | String providerName = NAME;
|
||
229 | |||
230 | fType.setHasOID(true);
|
||
231 | |||
232 | |||
233 | FieldTypeParser[] fieldTypes = new FieldTypeParser[headers.length]; |
||
234 | //
|
||
235 | // Calculamos cuales pueden ser los tipos de datos
|
||
236 | //
|
||
237 | for (int i = 0; i < fieldTypes.length; i++) { |
||
238 | fieldTypes[i] = new FieldTypeParser(providerName, fullFileName);
|
||
239 | } |
||
240 | |||
241 | // Asuminos los tipos pasados por parametro, que se supone
|
||
242 | // son los detectados automaticamente.
|
||
243 | if (automaticTypes != null) { |
||
244 | for (int i = 0; i < fieldTypes.length && i < automaticTypes.length; i++) { |
||
245 | fieldTypes[i].detectedValue = automaticTypes[i]; |
||
246 | fieldTypes[i].type = automaticTypes[i].getType(); |
||
247 | } |
||
248 | } |
||
249 | // Luego probamos con lo que diga las cabezeras del CVS, sobreescribiendo
|
||
250 | // los tipos anteriores en caso de definirse en la cabezara.
|
||
251 | boolean all_fields_declare_type = true; |
||
252 | for (int i = 0; i < fieldTypes.length; i++) { |
||
253 | if (!fieldTypes[i].parse(headers[i])) {
|
||
254 | LOGGER.warn("Can't parse header of field "+i+ "( "+headers[i]+") in '"+providerName+"' file '" + fullFileName + "'."); |
||
255 | } |
||
256 | if( fieldTypes[i].type == DataTypes.UNKNOWN ) {
|
||
257 | all_fields_declare_type = false;
|
||
258 | fieldTypes[i].type = DataTypes.STRING; |
||
259 | } |
||
260 | } |
||
261 | |||
262 | // Y por ultimo hacemos caso a lo que se haya especificado en los parametros
|
||
263 | // de apertura del CSV, teniendo esto prioridad sobre todo.
|
||
264 | String param_types_def = CSVStoreParameters.getRawFieldTypes(parameters);
|
||
265 | if (StringUtils.isNotBlank(param_types_def)) {
|
||
266 | String sep = CSVStoreParameters.getDelimiter(param_types_def);
|
||
267 | if (StringUtils.isNotBlank(sep)) {
|
||
268 | String[] param_types = param_types_def.split(sep); |
||
269 | FieldTypeParser parser = new FieldTypeParser(providerName, fullFileName);
|
||
270 | for (String param_type : param_types) { |
||
271 | parser.clear(); |
||
272 | parser.parse(param_type); |
||
273 | for (FieldTypeParser fieldType : fieldTypes) {
|
||
274 | if (StringUtils.equalsIgnoreCase(fieldType.name, parser.name)) {
|
||
275 | fieldType.copyFrom(parser); |
||
276 | break;
|
||
277 | } |
||
278 | } |
||
279 | } |
||
280 | } |
||
281 | } |
||
282 | //
|
||
283 | // Una vez ya sabemos los tipos de datos rellenamos el feature-type
|
||
284 | //
|
||
285 | 45724 | jjdelcerro | Tags ftypeTags = fType.getTags(); |
286 | 45685 | jjdelcerro | for (FieldTypeParser fieldType : fieldTypes) {
|
287 | EditableFeatureAttributeDescriptor fad = fType.add(fieldType.name, fieldType.type); |
||
288 | if( fieldType.detectedValue!=null ) { |
||
289 | fad.setDisplaySize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
||
290 | fad.setSize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
||
291 | if( fad.getPrecision()<fieldType.detectedValue.getPrecision() ) {
|
||
292 | fad.setPrecision(fieldType.detectedValue.getPrecision()); |
||
293 | } |
||
294 | if( fad.getScale()<fieldType.detectedValue.getScale()) {
|
||
295 | fad.setScale(fieldType.detectedValue.getScale()); |
||
296 | } |
||
297 | } else {
|
||
298 | fad.setDisplaySize(fieldType.size); |
||
299 | } |
||
300 | if (fieldType.type == DataTypes.GEOMETRY ) {
|
||
301 | fad.setGeometryType(fieldType.geomType, fieldType.geomSubtype); |
||
302 | if( fType.getDefaultGeometryAttributeName() == null ) { |
||
303 | fType.setDefaultGeometryAttributeName(fieldType.name); |
||
304 | } |
||
305 | } |
||
306 | Locale locale = CSVStoreParameters.getLocale(parameters);
|
||
307 | fad.setLocale(locale); |
||
308 | for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
||
309 | try {
|
||
310 | switch(entry.getKey().toLowerCase()) {
|
||
311 | case "expression": |
||
312 | // Los campos calculados los procesamos en una segunda
|
||
313 | // pasada, cuando ya estan definidos el resto de los campos
|
||
314 | // ya que pueden requerir campos que aun no se han definido.
|
||
315 | break;
|
||
316 | default:
|
||
317 | fad.set(entry.getKey(), entry.getValue()); |
||
318 | } |
||
319 | } catch (Exception ex) { |
||
320 | LOGGER.warn("Can't set property '"+entry.getKey()+"' of '"+fad.getName()+"'.", ex); |
||
321 | } |
||
322 | } |
||
323 | Tags tags = fad.getTags(); |
||
324 | for (Map.Entry<String, String> entry : fieldType.tags.entrySet()) { |
||
325 | tags.set(entry.getKey(), entry.getValue()); |
||
326 | } |
||
327 | 45724 | jjdelcerro | for (Map.Entry<String, String> entry : fieldType.typetags.entrySet()) { |
328 | ftypeTags.set(entry.getKey(), entry.getValue()); |
||
329 | } |
||
330 | 45784 | jjdelcerro | for (Map.Entry<String, String> entry : fieldType.typeAssignments.entrySet()) { |
331 | try {
|
||
332 | fType.set(entry.getKey(), entry.getValue()); |
||
333 | } catch(Exception ex) { |
||
334 | LOGGER.warn("Can't set attribute '"+entry.getKey()+"' in the feature type.", ex); |
||
335 | } |
||
336 | } |
||
337 | 45685 | jjdelcerro | } |
338 | // Processamos ahora los campos calculados
|
||
339 | for (FieldTypeParser fieldType : fieldTypes) {
|
||
340 | EditableFeatureAttributeDescriptor fad = fType.getEditableAttributeDescriptor(fieldType.name); |
||
341 | for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
||
342 | try {
|
||
343 | switch(entry.getKey().toLowerCase()) {
|
||
344 | case "expression": |
||
345 | fad.set(entry.getKey(), entry.getValue()); |
||
346 | break;
|
||
347 | } |
||
348 | } catch (Exception ex) { |
||
349 | LOGGER.warn("Can't set property '"+entry.getKey()+"' in '"+fad.getName()+"' of '"+fullFileName+"'.", ex); |
||
350 | } |
||
351 | } |
||
352 | } |
||
353 | String[] pointDimensionNames = CSVStoreParameters.getPointDimensionNames(parameters); |
||
354 | if ( pointDimensionNames != null ) { |
||
355 | CSVPointAttributeEmulator emulator = new CSVPointAttributeEmulator(pointDimensionNames);
|
||
356 | String columnName = CSVStoreParameters.getPointColumnName(parameters);
|
||
357 | if( StringUtils.isBlank(columnName) ) {
|
||
358 | columnName = "geom";
|
||
359 | } |
||
360 | EditableFeatureAttributeDescriptor attr = fType.add(columnName, DataTypes.GEOMETRY, emulator); |
||
361 | GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
||
362 | GeometryType gt; |
||
363 | try {
|
||
364 | if ( emulator.getFieldNames() != null && emulator.getFieldNames().length <= 2 ) { |
||
365 | gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM2D); |
||
366 | } else {
|
||
367 | gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM3D); |
||
368 | } |
||
369 | attr.setGeometryType(gt); |
||
370 | } catch (Exception e) { |
||
371 | LOGGER.warn("Can't set geometry type for the calculated field in '"+providerName+"' file '" + fullFileName + "'.", e); |
||
372 | } |
||
373 | } |
||
374 | |||
375 | String geometry_column = CSVStoreParameters.getGeometryColumn(parameters);
|
||
376 | if (!StringUtils.isEmpty(geometry_column)) {
|
||
377 | EditableFeatureAttributeDescriptor attr = (EditableFeatureAttributeDescriptor) fType.get(geometry_column); |
||
378 | if (attr != null ) { |
||
379 | if( attr.getType() != DataTypes.GEOMETRY ) {
|
||
380 | attr.setDataType(DataTypes.GEOMETRY); |
||
381 | } |
||
382 | GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
||
383 | GeometryType gt; |
||
384 | try {
|
||
385 | gt = geommgr.getGeometryType( |
||
386 | CSVStoreParameters.getGeometryType(parameters), |
||
387 | CSVStoreParameters.getGeometrySubType(parameters) |
||
388 | ); |
||
389 | attr.setGeometryType(gt); |
||
390 | } catch (Exception e) { |
||
391 | LOGGER.warn("Can't set geometry type for the calculated field in CSV file '" + fullFileName + "'.", e); |
||
392 | } |
||
393 | fType.setDefaultGeometryAttributeName(geometry_column); |
||
394 | } |
||
395 | } |
||
396 | return all_fields_declare_type;
|
||
397 | } |
||
398 | |||
399 | } |