svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / CSVUtils.java @ 46093
History | View | Annotate | Download (18.2 KB)
1 | 45685 | jjdelcerro | /**
|
---|---|---|---|
2 | * gvSIG. Desktop Geographic Information System.
|
||
3 | *
|
||
4 | * Copyright (C) 2007-2013 gvSIG Association.
|
||
5 | *
|
||
6 | * This program is free software; you can redistribute it and/or
|
||
7 | * modify it under the terms of the GNU General Public License
|
||
8 | * as published by the Free Software Foundation; either version 3
|
||
9 | * of the License, or (at your option) any later version.
|
||
10 | *
|
||
11 | * This program is distributed in the hope that it will be useful,
|
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
14 | * GNU General Public License for more details.
|
||
15 | *
|
||
16 | * You should have received a copy of the GNU General Public License
|
||
17 | * along with this program; if not, write to the Free Software
|
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||
19 | * MA 02110-1301, USA.
|
||
20 | *
|
||
21 | * For any additional information, do not hesitate to contact us
|
||
22 | * at info AT gvsig.com, or visit our website www.gvsig.com.
|
||
23 | */
|
||
24 | |||
25 | package org.gvsig.fmap.dal.store.csv; |
||
26 | |||
27 | import java.io.File; |
||
28 | import java.io.FileInputStream; |
||
29 | import java.io.FileNotFoundException; |
||
30 | import java.io.FileReader; |
||
31 | import java.io.IOException; |
||
32 | 46054 | omartinez | import java.io.InputStream; |
33 | 45685 | jjdelcerro | import java.io.InputStreamReader; |
34 | 46054 | omartinez | import java.io.Reader; |
35 | 45685 | jjdelcerro | import java.nio.charset.Charset; |
36 | import java.util.Locale; |
||
37 | import java.util.Map; |
||
38 | import org.apache.commons.io.FilenameUtils; |
||
39 | import org.apache.commons.io.IOUtils; |
||
40 | 46054 | omartinez | import org.apache.commons.io.input.BOMInputStream; |
41 | 45775 | jjdelcerro | import org.apache.commons.lang3.ArrayUtils; |
42 | 45685 | jjdelcerro | import org.apache.commons.lang3.StringUtils; |
43 | import org.gvsig.fmap.dal.DataTypes; |
||
44 | import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor; |
||
45 | import org.gvsig.fmap.dal.feature.EditableFeatureType; |
||
46 | import static org.gvsig.fmap.dal.store.csv.CSVStoreProvider.NAME; |
||
47 | import org.gvsig.fmap.dal.store.csv.simplereaders.CSVReaderSuperCSV; |
||
48 | import org.gvsig.fmap.dal.store.csv.simplereaders.FixedLenReader; |
||
49 | import org.gvsig.fmap.dal.store.csv.simplereaders.JSonReader; |
||
50 | import org.gvsig.fmap.dal.store.csv.simplereaders.SimpleReader; |
||
51 | import org.gvsig.fmap.geom.Geometry; |
||
52 | import org.gvsig.fmap.geom.GeometryLocator; |
||
53 | import org.gvsig.fmap.geom.GeometryManager; |
||
54 | import org.gvsig.fmap.geom.type.GeometryType; |
||
55 | import org.gvsig.tools.dynobject.Tags; |
||
56 | 45929 | jjdelcerro | import org.gvsig.tools.task.SimpleTaskStatus; |
57 | 45685 | jjdelcerro | import org.slf4j.Logger; |
58 | import org.slf4j.LoggerFactory; |
||
59 | |||
60 | /**
|
||
61 | *
|
||
62 | * @author gvSIG Team
|
||
63 | */
|
||
64 | 45720 | jjdelcerro | @SuppressWarnings("UseSpecificCatch") |
65 | 45685 | jjdelcerro | public class CSVUtils { |
66 | |||
67 | private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtils.class); |
||
68 | |||
69 | private CSVUtils() {
|
||
70 | |||
71 | } |
||
72 | public static InputStreamReader openFile(File f, String charsetName) throws FileNotFoundException { |
||
73 | String fullFileName = f==null? "NULL":f.getAbsolutePath(); |
||
74 | Charset charset = Charset.defaultCharset(); |
||
75 | 46054 | omartinez | InputStream fis = new BOMInputStream(new FileInputStream(f)); |
76 | 45720 | jjdelcerro | if (StringUtils.isNotBlank(charsetName)) {
|
77 | 45685 | jjdelcerro | if (Charset.isSupported(charsetName)) { |
78 | try {
|
||
79 | charset = Charset.forName(charsetName);
|
||
80 | } catch (Throwable th) { |
||
81 | LOGGER.warn("Can't use charset '" + charsetName + "' for read csv '" + fullFileName + "'.", th); |
||
82 | } |
||
83 | } else {
|
||
84 | LOGGER.warn("charset '" + charsetName + "' not supported for read csv '" + fullFileName + "'."); |
||
85 | } |
||
86 | } |
||
87 | InputStreamReader isr = new InputStreamReader(fis, charset); |
||
88 | return isr;
|
||
89 | } |
||
90 | |||
91 | 45929 | jjdelcerro | public static boolean loadFeatureType(CSVStoreParameters parameters, EditableFeatureType featureType, boolean detectTypes, SimpleTaskStatus status) throws IOException { |
92 | 45685 | jjdelcerro | InputStreamReader in = null; |
93 | SimpleReader reader = null;
|
||
94 | try {
|
||
95 | String headers[]; |
||
96 | |||
97 | in = openFile( |
||
98 | parameters.getFile(), |
||
99 | CSVStoreParameters.getCharset(parameters) |
||
100 | ); |
||
101 | |||
102 | reader = getSimpleReader(parameters, in); |
||
103 | |||
104 | headers = CSVStoreParameters.getHeaders(parameters); |
||
105 | if (headers == null) { |
||
106 | if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
||
107 | headers = reader.getHeader(); |
||
108 | if (headers == null) { |
||
109 | if (CSVStoreParameters.getIgnoreErrors(parameters)) {
|
||
110 | headers = getFixedHeaders(reader.getColumnsCount()); |
||
111 | } else {
|
||
112 | String msg = "Can't retrieve header from csv file '" |
||
113 | + parameters.getFile() |
||
114 | .getAbsolutePath() |
||
115 | + "' and not specified in the parameters.";
|
||
116 | LOGGER.warn(msg); |
||
117 | throw new RuntimeException(msg); |
||
118 | } |
||
119 | } |
||
120 | } else {
|
||
121 | headers = getFixedHeaders(reader.getColumnsCount()); |
||
122 | } |
||
123 | } else {
|
||
124 | if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
||
125 | reader.getHeader(); // Skip and ignore the header of file
|
||
126 | } |
||
127 | } |
||
128 | |||
129 | AutomaticDetectionOfTypes.DetectedValue[] detectedTypes = null; |
||
130 | if( detectTypes ) {
|
||
131 | 45929 | jjdelcerro | detectedTypes = automaticDetectionOfTypes(parameters, headers, status); |
132 | 45685 | jjdelcerro | } |
133 | 45775 | jjdelcerro | if( StringUtils.isBlank(headers[headers.length-1]) && |
134 | (detectedTypes==null || detectedTypes[headers.length-1].isBlank()) ) { |
||
135 | headers = ArrayUtils.remove(headers, headers.length-1);
|
||
136 | } |
||
137 | 45685 | jjdelcerro | if (detectedTypes != null && detectedTypes.length > headers.length) { |
138 | // Se han detectado mas columnas que las que hay en la cabezera,
|
||
139 | // a?adimos mas columnas a la cabezera.
|
||
140 | String[] headers2 = new String[detectedTypes.length]; |
||
141 | for (int i = 0; i < headers2.length; i++) { |
||
142 | if (i < headers.length) {
|
||
143 | headers2[i] = headers[i]; |
||
144 | } else {
|
||
145 | headers2[i] = getFixedHeader(i); |
||
146 | } |
||
147 | } |
||
148 | headers = headers2; |
||
149 | } |
||
150 | for (int i = 0; i < headers.length; i++) { |
||
151 | 45775 | jjdelcerro | if (StringUtils.isBlank(headers[i])) {
|
152 | 45685 | jjdelcerro | headers[i] = getFixedHeader(i); |
153 | } |
||
154 | } |
||
155 | // Initialize the feature types
|
||
156 | return fillFeatureType(parameters, featureType, headers, detectedTypes);
|
||
157 | } finally {
|
||
158 | IOUtils.closeQuietly(in); |
||
159 | IOUtils.closeQuietly(reader); |
||
160 | } |
||
161 | } |
||
162 | 46054 | omartinez | public static SimpleReader getSimpleReader(CSVStoreParameters parameters, Reader in) throws IOException { |
163 | 45685 | jjdelcerro | SimpleReader reader; |
164 | String filename = CSVStoreParameters.getFileName(parameters);
|
||
165 | if (FilenameUtils.isExtension(filename, "json")){ |
||
166 | reader= new JSonReader(in,parameters);
|
||
167 | } else if (CSVStoreParameters.getRawFieldsDefinition(parameters) != null) { |
||
168 | reader = new FixedLenReader(in, parameters);
|
||
169 | } else {
|
||
170 | reader = new CSVReaderSuperCSV(in, parameters);
|
||
171 | } |
||
172 | return reader;
|
||
173 | } |
||
174 | |||
175 | private static String getFixedHeader(int column) { |
||
176 | char[] header = new char[3]; |
||
177 | |||
178 | String s = String.format("%03d", column); |
||
179 | header[0] = (char) (s.charAt(0) + 17); |
||
180 | header[1] = (char) (s.charAt(1) + 17); |
||
181 | header[2] = (char) (s.charAt(2) + 17); |
||
182 | return String.valueOf(header); |
||
183 | } |
||
184 | |||
185 | private static String[] getFixedHeaders(int count) { |
||
186 | String[] headers = new String[count]; |
||
187 | for (int i = 0; i < headers.length; i++) { |
||
188 | headers[i] = getFixedHeader(i); |
||
189 | } |
||
190 | return headers;
|
||
191 | } |
||
192 | |||
193 | 45929 | jjdelcerro | private static AutomaticDetectionOfTypes.DetectedValue[] automaticDetectionOfTypes(CSVStoreParameters parameters, String[] headers, SimpleTaskStatus status) throws IOException { |
194 | 45685 | jjdelcerro | String fullFileName = parameters.getFile()==null? "NULL":parameters.getFile().getAbsolutePath(); |
195 | boolean automatic_types_detection = CSVStoreParameters.getAutomaticTypesDetection(parameters);
|
||
196 | if (!automatic_types_detection) {
|
||
197 | return null; |
||
198 | } |
||
199 | AutomaticDetectionOfTypes.DetectedValue[] types = null; |
||
200 | |||
201 | 46054 | omartinez | Reader in = null; |
202 | 45685 | jjdelcerro | SimpleReader reader = null;
|
203 | |||
204 | try {
|
||
205 | 46054 | omartinez | in = openFile( |
206 | parameters.getFile(), |
||
207 | CSVStoreParameters.getCharset(parameters) |
||
208 | ); |
||
209 | 45685 | jjdelcerro | reader = getSimpleReader(parameters, in); |
210 | AutomaticDetectionOfTypes x = new AutomaticDetectionOfTypes(
|
||
211 | fullFileName |
||
212 | ); |
||
213 | types = x.detect( |
||
214 | headers.length, |
||
215 | reader, |
||
216 | CSVStoreParameters.isFirstLineHeader(parameters), |
||
217 | 45929 | jjdelcerro | CSVStoreParameters.getLocale(parameters), |
218 | status |
||
219 | 45685 | jjdelcerro | ); |
220 | } catch (Exception ex) { |
||
221 | int lineno = 0; |
||
222 | if (reader != null) { |
||
223 | lineno = reader.getLine(); |
||
224 | } |
||
225 | throw new RuntimeException("Problems reading file '" + fullFileName + "' near line " + lineno + ".", ex); |
||
226 | |||
227 | } finally {
|
||
228 | IOUtils.closeQuietly(reader); |
||
229 | IOUtils.closeQuietly(in); |
||
230 | } |
||
231 | return types;
|
||
232 | } |
||
233 | |||
234 | private static boolean fillFeatureType(CSVStoreParameters parameters, EditableFeatureType fType, String headers[], AutomaticDetectionOfTypes.DetectedValue automaticTypes[]) { |
||
235 | String fullFileName = parameters.getFile()==null? "":parameters.getFile().getAbsolutePath(); |
||
236 | String providerName = NAME;
|
||
237 | |||
238 | fType.setHasOID(true);
|
||
239 | |||
240 | |||
241 | FieldTypeParser[] fieldTypes = new FieldTypeParser[headers.length]; |
||
242 | //
|
||
243 | // Calculamos cuales pueden ser los tipos de datos
|
||
244 | //
|
||
245 | for (int i = 0; i < fieldTypes.length; i++) { |
||
246 | fieldTypes[i] = new FieldTypeParser(providerName, fullFileName);
|
||
247 | } |
||
248 | |||
249 | // Asuminos los tipos pasados por parametro, que se supone
|
||
250 | // son los detectados automaticamente.
|
||
251 | if (automaticTypes != null) { |
||
252 | for (int i = 0; i < fieldTypes.length && i < automaticTypes.length; i++) { |
||
253 | fieldTypes[i].detectedValue = automaticTypes[i]; |
||
254 | fieldTypes[i].type = automaticTypes[i].getType(); |
||
255 | } |
||
256 | } |
||
257 | // Luego probamos con lo que diga las cabezeras del CVS, sobreescribiendo
|
||
258 | // los tipos anteriores en caso de definirse en la cabezara.
|
||
259 | boolean all_fields_declare_type = true; |
||
260 | for (int i = 0; i < fieldTypes.length; i++) { |
||
261 | if (!fieldTypes[i].parse(headers[i])) {
|
||
262 | LOGGER.warn("Can't parse header of field "+i+ "( "+headers[i]+") in '"+providerName+"' file '" + fullFileName + "'."); |
||
263 | } |
||
264 | if( fieldTypes[i].type == DataTypes.UNKNOWN ) {
|
||
265 | all_fields_declare_type = false;
|
||
266 | fieldTypes[i].type = DataTypes.STRING; |
||
267 | } |
||
268 | } |
||
269 | |||
270 | // Y por ultimo hacemos caso a lo que se haya especificado en los parametros
|
||
271 | // de apertura del CSV, teniendo esto prioridad sobre todo.
|
||
272 | String param_types_def = CSVStoreParameters.getRawFieldTypes(parameters);
|
||
273 | if (StringUtils.isNotBlank(param_types_def)) {
|
||
274 | String sep = CSVStoreParameters.getDelimiter(param_types_def);
|
||
275 | if (StringUtils.isNotBlank(sep)) {
|
||
276 | String[] param_types = param_types_def.split(sep); |
||
277 | FieldTypeParser parser = new FieldTypeParser(providerName, fullFileName);
|
||
278 | for (String param_type : param_types) { |
||
279 | parser.clear(); |
||
280 | parser.parse(param_type); |
||
281 | for (FieldTypeParser fieldType : fieldTypes) {
|
||
282 | if (StringUtils.equalsIgnoreCase(fieldType.name, parser.name)) {
|
||
283 | fieldType.copyFrom(parser); |
||
284 | break;
|
||
285 | } |
||
286 | } |
||
287 | } |
||
288 | } |
||
289 | } |
||
290 | //
|
||
291 | // Una vez ya sabemos los tipos de datos rellenamos el feature-type
|
||
292 | //
|
||
293 | 45724 | jjdelcerro | Tags ftypeTags = fType.getTags(); |
294 | 45685 | jjdelcerro | for (FieldTypeParser fieldType : fieldTypes) {
|
295 | EditableFeatureAttributeDescriptor fad = fType.add(fieldType.name, fieldType.type); |
||
296 | if( fieldType.detectedValue!=null ) { |
||
297 | fad.setDisplaySize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
||
298 | fad.setSize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
||
299 | if( fad.getPrecision()<fieldType.detectedValue.getPrecision() ) {
|
||
300 | fad.setPrecision(fieldType.detectedValue.getPrecision()); |
||
301 | } |
||
302 | if( fad.getScale()<fieldType.detectedValue.getScale()) {
|
||
303 | fad.setScale(fieldType.detectedValue.getScale()); |
||
304 | } |
||
305 | } else {
|
||
306 | fad.setDisplaySize(fieldType.size); |
||
307 | } |
||
308 | if (fieldType.type == DataTypes.GEOMETRY ) {
|
||
309 | fad.setGeometryType(fieldType.geomType, fieldType.geomSubtype); |
||
310 | if( fType.getDefaultGeometryAttributeName() == null ) { |
||
311 | fType.setDefaultGeometryAttributeName(fieldType.name); |
||
312 | } |
||
313 | 46093 | fdiaz | } |
314 | Locale locale = null; |
||
315 | if (fieldType.type == DataTypes.TIMESTAMP ) {
|
||
316 | if(!CSVStoreParameters.isBlankOrDefaultLocale(parameters)){
|
||
317 | locale = CSVStoreParameters.getLocale(parameters); |
||
318 | } |
||
319 | } else {
|
||
320 | locale = CSVStoreParameters.getLocale(parameters); |
||
321 | 45685 | jjdelcerro | } |
322 | fad.setLocale(locale); |
||
323 | for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
||
324 | try {
|
||
325 | switch(entry.getKey().toLowerCase()) {
|
||
326 | case "expression": |
||
327 | // Los campos calculados los procesamos en una segunda
|
||
328 | // pasada, cuando ya estan definidos el resto de los campos
|
||
329 | // ya que pueden requerir campos que aun no se han definido.
|
||
330 | break;
|
||
331 | default:
|
||
332 | fad.set(entry.getKey(), entry.getValue()); |
||
333 | } |
||
334 | } catch (Exception ex) { |
||
335 | LOGGER.warn("Can't set property '"+entry.getKey()+"' of '"+fad.getName()+"'.", ex); |
||
336 | } |
||
337 | } |
||
338 | Tags tags = fad.getTags(); |
||
339 | for (Map.Entry<String, String> entry : fieldType.tags.entrySet()) { |
||
340 | tags.set(entry.getKey(), entry.getValue()); |
||
341 | } |
||
342 | 45724 | jjdelcerro | for (Map.Entry<String, String> entry : fieldType.typetags.entrySet()) { |
343 | ftypeTags.set(entry.getKey(), entry.getValue()); |
||
344 | } |
||
345 | 45784 | jjdelcerro | for (Map.Entry<String, String> entry : fieldType.typeAssignments.entrySet()) { |
346 | try {
|
||
347 | fType.set(entry.getKey(), entry.getValue()); |
||
348 | } catch(Exception ex) { |
||
349 | LOGGER.warn("Can't set attribute '"+entry.getKey()+"' in the feature type.", ex); |
||
350 | } |
||
351 | } |
||
352 | 45685 | jjdelcerro | } |
353 | // Processamos ahora los campos calculados
|
||
354 | for (FieldTypeParser fieldType : fieldTypes) {
|
||
355 | EditableFeatureAttributeDescriptor fad = fType.getEditableAttributeDescriptor(fieldType.name); |
||
356 | for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
||
357 | try {
|
||
358 | switch(entry.getKey().toLowerCase()) {
|
||
359 | case "expression": |
||
360 | fad.set(entry.getKey(), entry.getValue()); |
||
361 | break;
|
||
362 | } |
||
363 | } catch (Exception ex) { |
||
364 | LOGGER.warn("Can't set property '"+entry.getKey()+"' in '"+fad.getName()+"' of '"+fullFileName+"'.", ex); |
||
365 | } |
||
366 | } |
||
367 | } |
||
368 | String[] pointDimensionNames = CSVStoreParameters.getPointDimensionNames(parameters); |
||
369 | if ( pointDimensionNames != null ) { |
||
370 | CSVPointAttributeEmulator emulator = new CSVPointAttributeEmulator(pointDimensionNames);
|
||
371 | String columnName = CSVStoreParameters.getPointColumnName(parameters);
|
||
372 | if( StringUtils.isBlank(columnName) ) {
|
||
373 | columnName = "geom";
|
||
374 | } |
||
375 | EditableFeatureAttributeDescriptor attr = fType.add(columnName, DataTypes.GEOMETRY, emulator); |
||
376 | GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
||
377 | GeometryType gt; |
||
378 | try {
|
||
379 | if ( emulator.getFieldNames() != null && emulator.getFieldNames().length <= 2 ) { |
||
380 | gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM2D); |
||
381 | } else {
|
||
382 | gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM3D); |
||
383 | } |
||
384 | attr.setGeometryType(gt); |
||
385 | } catch (Exception e) { |
||
386 | LOGGER.warn("Can't set geometry type for the calculated field in '"+providerName+"' file '" + fullFileName + "'.", e); |
||
387 | } |
||
388 | } |
||
389 | |||
390 | String geometry_column = CSVStoreParameters.getGeometryColumn(parameters);
|
||
391 | if (!StringUtils.isEmpty(geometry_column)) {
|
||
392 | EditableFeatureAttributeDescriptor attr = (EditableFeatureAttributeDescriptor) fType.get(geometry_column); |
||
393 | if (attr != null ) { |
||
394 | if( attr.getType() != DataTypes.GEOMETRY ) {
|
||
395 | attr.setDataType(DataTypes.GEOMETRY); |
||
396 | } |
||
397 | GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
||
398 | GeometryType gt; |
||
399 | try {
|
||
400 | gt = geommgr.getGeometryType( |
||
401 | CSVStoreParameters.getGeometryType(parameters), |
||
402 | CSVStoreParameters.getGeometrySubType(parameters) |
||
403 | ); |
||
404 | attr.setGeometryType(gt); |
||
405 | } catch (Exception e) { |
||
406 | LOGGER.warn("Can't set geometry type for the calculated field in CSV file '" + fullFileName + "'.", e); |
||
407 | } |
||
408 | fType.setDefaultGeometryAttributeName(geometry_column); |
||
409 | } |
||
410 | } |
||
411 | return all_fields_declare_type;
|
||
412 | } |
||
413 | |||
414 | } |