svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / CSVUtils.java @ 46054
History | View | Annotate | Download (17.9 KB)
1 |
/**
|
---|---|
2 |
* gvSIG. Desktop Geographic Information System.
|
3 |
*
|
4 |
* Copyright (C) 2007-2013 gvSIG Association.
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 3
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
* For any additional information, do not hesitate to contact us
|
22 |
* at info AT gvsig.com, or visit our website www.gvsig.com.
|
23 |
*/
|
24 |
|
25 |
package org.gvsig.fmap.dal.store.csv; |
26 |
|
27 |
import java.io.File; |
28 |
import java.io.FileInputStream; |
29 |
import java.io.FileNotFoundException; |
30 |
import java.io.FileReader; |
31 |
import java.io.IOException; |
32 |
import java.io.InputStream; |
33 |
import java.io.InputStreamReader; |
34 |
import java.io.Reader; |
35 |
import java.nio.charset.Charset; |
36 |
import java.util.Locale; |
37 |
import java.util.Map; |
38 |
import org.apache.commons.io.FilenameUtils; |
39 |
import org.apache.commons.io.IOUtils; |
40 |
import org.apache.commons.io.input.BOMInputStream; |
41 |
import org.apache.commons.lang3.ArrayUtils; |
42 |
import org.apache.commons.lang3.StringUtils; |
43 |
import org.gvsig.fmap.dal.DataTypes; |
44 |
import org.gvsig.fmap.dal.feature.EditableFeatureAttributeDescriptor; |
45 |
import org.gvsig.fmap.dal.feature.EditableFeatureType; |
46 |
import static org.gvsig.fmap.dal.store.csv.CSVStoreProvider.NAME; |
47 |
import org.gvsig.fmap.dal.store.csv.simplereaders.CSVReaderSuperCSV; |
48 |
import org.gvsig.fmap.dal.store.csv.simplereaders.FixedLenReader; |
49 |
import org.gvsig.fmap.dal.store.csv.simplereaders.JSonReader; |
50 |
import org.gvsig.fmap.dal.store.csv.simplereaders.SimpleReader; |
51 |
import org.gvsig.fmap.geom.Geometry; |
52 |
import org.gvsig.fmap.geom.GeometryLocator; |
53 |
import org.gvsig.fmap.geom.GeometryManager; |
54 |
import org.gvsig.fmap.geom.type.GeometryType; |
55 |
import org.gvsig.tools.dynobject.Tags; |
56 |
import org.gvsig.tools.task.SimpleTaskStatus; |
57 |
import org.slf4j.Logger; |
58 |
import org.slf4j.LoggerFactory; |
59 |
|
60 |
/**
|
61 |
*
|
62 |
* @author gvSIG Team
|
63 |
*/
|
64 |
@SuppressWarnings("UseSpecificCatch") |
65 |
public class CSVUtils { |
66 |
|
67 |
private static final Logger LOGGER = LoggerFactory.getLogger(CSVUtils.class); |
68 |
|
69 |
private CSVUtils() {
|
70 |
|
71 |
} |
72 |
public static InputStreamReader openFile(File f, String charsetName) throws FileNotFoundException { |
73 |
String fullFileName = f==null? "NULL":f.getAbsolutePath(); |
74 |
Charset charset = Charset.defaultCharset(); |
75 |
InputStream fis = new BOMInputStream(new FileInputStream(f)); |
76 |
if (StringUtils.isNotBlank(charsetName)) {
|
77 |
if (Charset.isSupported(charsetName)) { |
78 |
try {
|
79 |
charset = Charset.forName(charsetName);
|
80 |
} catch (Throwable th) { |
81 |
LOGGER.warn("Can't use charset '" + charsetName + "' for read csv '" + fullFileName + "'.", th); |
82 |
} |
83 |
} else {
|
84 |
LOGGER.warn("charset '" + charsetName + "' not supported for read csv '" + fullFileName + "'."); |
85 |
} |
86 |
} |
87 |
InputStreamReader isr = new InputStreamReader(fis, charset); |
88 |
return isr;
|
89 |
} |
90 |
|
91 |
public static boolean loadFeatureType(CSVStoreParameters parameters, EditableFeatureType featureType, boolean detectTypes, SimpleTaskStatus status) throws IOException { |
92 |
InputStreamReader in = null; |
93 |
SimpleReader reader = null;
|
94 |
try {
|
95 |
String headers[]; |
96 |
|
97 |
in = openFile( |
98 |
parameters.getFile(), |
99 |
CSVStoreParameters.getCharset(parameters) |
100 |
); |
101 |
|
102 |
reader = getSimpleReader(parameters, in); |
103 |
|
104 |
headers = CSVStoreParameters.getHeaders(parameters); |
105 |
if (headers == null) { |
106 |
if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
107 |
headers = reader.getHeader(); |
108 |
if (headers == null) { |
109 |
if (CSVStoreParameters.getIgnoreErrors(parameters)) {
|
110 |
headers = getFixedHeaders(reader.getColumnsCount()); |
111 |
} else {
|
112 |
String msg = "Can't retrieve header from csv file '" |
113 |
+ parameters.getFile() |
114 |
.getAbsolutePath() |
115 |
+ "' and not specified in the parameters.";
|
116 |
LOGGER.warn(msg); |
117 |
throw new RuntimeException(msg); |
118 |
} |
119 |
} |
120 |
} else {
|
121 |
headers = getFixedHeaders(reader.getColumnsCount()); |
122 |
} |
123 |
} else {
|
124 |
if (CSVStoreParameters.isFirstLineHeader(parameters)) {
|
125 |
reader.getHeader(); // Skip and ignore the header of file
|
126 |
} |
127 |
} |
128 |
|
129 |
AutomaticDetectionOfTypes.DetectedValue[] detectedTypes = null; |
130 |
if( detectTypes ) {
|
131 |
detectedTypes = automaticDetectionOfTypes(parameters, headers, status); |
132 |
} |
133 |
if( StringUtils.isBlank(headers[headers.length-1]) && |
134 |
(detectedTypes==null || detectedTypes[headers.length-1].isBlank()) ) { |
135 |
headers = ArrayUtils.remove(headers, headers.length-1);
|
136 |
} |
137 |
if (detectedTypes != null && detectedTypes.length > headers.length) { |
138 |
// Se han detectado mas columnas que las que hay en la cabezera,
|
139 |
// a?adimos mas columnas a la cabezera.
|
140 |
String[] headers2 = new String[detectedTypes.length]; |
141 |
for (int i = 0; i < headers2.length; i++) { |
142 |
if (i < headers.length) {
|
143 |
headers2[i] = headers[i]; |
144 |
} else {
|
145 |
headers2[i] = getFixedHeader(i); |
146 |
} |
147 |
} |
148 |
headers = headers2; |
149 |
} |
150 |
for (int i = 0; i < headers.length; i++) { |
151 |
if (StringUtils.isBlank(headers[i])) {
|
152 |
headers[i] = getFixedHeader(i); |
153 |
} |
154 |
} |
155 |
// Initialize the feature types
|
156 |
return fillFeatureType(parameters, featureType, headers, detectedTypes);
|
157 |
} finally {
|
158 |
IOUtils.closeQuietly(in); |
159 |
IOUtils.closeQuietly(reader); |
160 |
} |
161 |
} |
162 |
public static SimpleReader getSimpleReader(CSVStoreParameters parameters, Reader in) throws IOException { |
163 |
SimpleReader reader; |
164 |
String filename = CSVStoreParameters.getFileName(parameters);
|
165 |
if (FilenameUtils.isExtension(filename, "json")){ |
166 |
reader= new JSonReader(in,parameters);
|
167 |
} else if (CSVStoreParameters.getRawFieldsDefinition(parameters) != null) { |
168 |
reader = new FixedLenReader(in, parameters);
|
169 |
} else {
|
170 |
reader = new CSVReaderSuperCSV(in, parameters);
|
171 |
} |
172 |
return reader;
|
173 |
} |
174 |
|
175 |
private static String getFixedHeader(int column) { |
176 |
char[] header = new char[3]; |
177 |
|
178 |
String s = String.format("%03d", column); |
179 |
header[0] = (char) (s.charAt(0) + 17); |
180 |
header[1] = (char) (s.charAt(1) + 17); |
181 |
header[2] = (char) (s.charAt(2) + 17); |
182 |
return String.valueOf(header); |
183 |
} |
184 |
|
185 |
private static String[] getFixedHeaders(int count) { |
186 |
String[] headers = new String[count]; |
187 |
for (int i = 0; i < headers.length; i++) { |
188 |
headers[i] = getFixedHeader(i); |
189 |
} |
190 |
return headers;
|
191 |
} |
192 |
|
193 |
private static AutomaticDetectionOfTypes.DetectedValue[] automaticDetectionOfTypes(CSVStoreParameters parameters, String[] headers, SimpleTaskStatus status) throws IOException { |
194 |
String fullFileName = parameters.getFile()==null? "NULL":parameters.getFile().getAbsolutePath(); |
195 |
boolean automatic_types_detection = CSVStoreParameters.getAutomaticTypesDetection(parameters);
|
196 |
if (!automatic_types_detection) {
|
197 |
return null; |
198 |
} |
199 |
AutomaticDetectionOfTypes.DetectedValue[] types = null; |
200 |
|
201 |
Reader in = null; |
202 |
SimpleReader reader = null;
|
203 |
|
204 |
try {
|
205 |
in = openFile( |
206 |
parameters.getFile(), |
207 |
CSVStoreParameters.getCharset(parameters) |
208 |
); |
209 |
reader = getSimpleReader(parameters, in); |
210 |
AutomaticDetectionOfTypes x = new AutomaticDetectionOfTypes(
|
211 |
fullFileName |
212 |
); |
213 |
types = x.detect( |
214 |
headers.length, |
215 |
reader, |
216 |
CSVStoreParameters.isFirstLineHeader(parameters), |
217 |
CSVStoreParameters.getLocale(parameters), |
218 |
status |
219 |
); |
220 |
} catch (Exception ex) { |
221 |
int lineno = 0; |
222 |
if (reader != null) { |
223 |
lineno = reader.getLine(); |
224 |
} |
225 |
throw new RuntimeException("Problems reading file '" + fullFileName + "' near line " + lineno + ".", ex); |
226 |
|
227 |
} finally {
|
228 |
IOUtils.closeQuietly(reader); |
229 |
IOUtils.closeQuietly(in); |
230 |
} |
231 |
return types;
|
232 |
} |
233 |
|
234 |
private static boolean fillFeatureType(CSVStoreParameters parameters, EditableFeatureType fType, String headers[], AutomaticDetectionOfTypes.DetectedValue automaticTypes[]) { |
235 |
String fullFileName = parameters.getFile()==null? "":parameters.getFile().getAbsolutePath(); |
236 |
String providerName = NAME;
|
237 |
|
238 |
fType.setHasOID(true);
|
239 |
|
240 |
|
241 |
FieldTypeParser[] fieldTypes = new FieldTypeParser[headers.length]; |
242 |
//
|
243 |
// Calculamos cuales pueden ser los tipos de datos
|
244 |
//
|
245 |
for (int i = 0; i < fieldTypes.length; i++) { |
246 |
fieldTypes[i] = new FieldTypeParser(providerName, fullFileName);
|
247 |
} |
248 |
|
249 |
// Asuminos los tipos pasados por parametro, que se supone
|
250 |
// son los detectados automaticamente.
|
251 |
if (automaticTypes != null) { |
252 |
for (int i = 0; i < fieldTypes.length && i < automaticTypes.length; i++) { |
253 |
fieldTypes[i].detectedValue = automaticTypes[i]; |
254 |
fieldTypes[i].type = automaticTypes[i].getType(); |
255 |
} |
256 |
} |
257 |
// Luego probamos con lo que diga las cabezeras del CVS, sobreescribiendo
|
258 |
// los tipos anteriores en caso de definirse en la cabezara.
|
259 |
boolean all_fields_declare_type = true; |
260 |
for (int i = 0; i < fieldTypes.length; i++) { |
261 |
if (!fieldTypes[i].parse(headers[i])) {
|
262 |
LOGGER.warn("Can't parse header of field "+i+ "( "+headers[i]+") in '"+providerName+"' file '" + fullFileName + "'."); |
263 |
} |
264 |
if( fieldTypes[i].type == DataTypes.UNKNOWN ) {
|
265 |
all_fields_declare_type = false;
|
266 |
fieldTypes[i].type = DataTypes.STRING; |
267 |
} |
268 |
} |
269 |
|
270 |
// Y por ultimo hacemos caso a lo que se haya especificado en los parametros
|
271 |
// de apertura del CSV, teniendo esto prioridad sobre todo.
|
272 |
String param_types_def = CSVStoreParameters.getRawFieldTypes(parameters);
|
273 |
if (StringUtils.isNotBlank(param_types_def)) {
|
274 |
String sep = CSVStoreParameters.getDelimiter(param_types_def);
|
275 |
if (StringUtils.isNotBlank(sep)) {
|
276 |
String[] param_types = param_types_def.split(sep); |
277 |
FieldTypeParser parser = new FieldTypeParser(providerName, fullFileName);
|
278 |
for (String param_type : param_types) { |
279 |
parser.clear(); |
280 |
parser.parse(param_type); |
281 |
for (FieldTypeParser fieldType : fieldTypes) {
|
282 |
if (StringUtils.equalsIgnoreCase(fieldType.name, parser.name)) {
|
283 |
fieldType.copyFrom(parser); |
284 |
break;
|
285 |
} |
286 |
} |
287 |
} |
288 |
} |
289 |
} |
290 |
//
|
291 |
// Una vez ya sabemos los tipos de datos rellenamos el feature-type
|
292 |
//
|
293 |
Tags ftypeTags = fType.getTags(); |
294 |
for (FieldTypeParser fieldType : fieldTypes) {
|
295 |
EditableFeatureAttributeDescriptor fad = fType.add(fieldType.name, fieldType.type); |
296 |
if( fieldType.detectedValue!=null ) { |
297 |
fad.setDisplaySize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
298 |
fad.setSize(Math.max(fieldType.detectedValue.getDisplaySize(), fieldType.size));
|
299 |
if( fad.getPrecision()<fieldType.detectedValue.getPrecision() ) {
|
300 |
fad.setPrecision(fieldType.detectedValue.getPrecision()); |
301 |
} |
302 |
if( fad.getScale()<fieldType.detectedValue.getScale()) {
|
303 |
fad.setScale(fieldType.detectedValue.getScale()); |
304 |
} |
305 |
} else {
|
306 |
fad.setDisplaySize(fieldType.size); |
307 |
} |
308 |
if (fieldType.type == DataTypes.GEOMETRY ) {
|
309 |
fad.setGeometryType(fieldType.geomType, fieldType.geomSubtype); |
310 |
if( fType.getDefaultGeometryAttributeName() == null ) { |
311 |
fType.setDefaultGeometryAttributeName(fieldType.name); |
312 |
} |
313 |
} |
314 |
Locale locale = CSVStoreParameters.getLocale(parameters);
|
315 |
fad.setLocale(locale); |
316 |
for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
317 |
try {
|
318 |
switch(entry.getKey().toLowerCase()) {
|
319 |
case "expression": |
320 |
// Los campos calculados los procesamos en una segunda
|
321 |
// pasada, cuando ya estan definidos el resto de los campos
|
322 |
// ya que pueden requerir campos que aun no se han definido.
|
323 |
break;
|
324 |
default:
|
325 |
fad.set(entry.getKey(), entry.getValue()); |
326 |
} |
327 |
} catch (Exception ex) { |
328 |
LOGGER.warn("Can't set property '"+entry.getKey()+"' of '"+fad.getName()+"'.", ex); |
329 |
} |
330 |
} |
331 |
Tags tags = fad.getTags(); |
332 |
for (Map.Entry<String, String> entry : fieldType.tags.entrySet()) { |
333 |
tags.set(entry.getKey(), entry.getValue()); |
334 |
} |
335 |
for (Map.Entry<String, String> entry : fieldType.typetags.entrySet()) { |
336 |
ftypeTags.set(entry.getKey(), entry.getValue()); |
337 |
} |
338 |
for (Map.Entry<String, String> entry : fieldType.typeAssignments.entrySet()) { |
339 |
try {
|
340 |
fType.set(entry.getKey(), entry.getValue()); |
341 |
} catch(Exception ex) { |
342 |
LOGGER.warn("Can't set attribute '"+entry.getKey()+"' in the feature type.", ex); |
343 |
} |
344 |
} |
345 |
} |
346 |
// Processamos ahora los campos calculados
|
347 |
for (FieldTypeParser fieldType : fieldTypes) {
|
348 |
EditableFeatureAttributeDescriptor fad = fType.getEditableAttributeDescriptor(fieldType.name); |
349 |
for (Map.Entry<String, String> entry : fieldType.assignments.entrySet()) { |
350 |
try {
|
351 |
switch(entry.getKey().toLowerCase()) {
|
352 |
case "expression": |
353 |
fad.set(entry.getKey(), entry.getValue()); |
354 |
break;
|
355 |
} |
356 |
} catch (Exception ex) { |
357 |
LOGGER.warn("Can't set property '"+entry.getKey()+"' in '"+fad.getName()+"' of '"+fullFileName+"'.", ex); |
358 |
} |
359 |
} |
360 |
} |
361 |
String[] pointDimensionNames = CSVStoreParameters.getPointDimensionNames(parameters); |
362 |
if ( pointDimensionNames != null ) { |
363 |
CSVPointAttributeEmulator emulator = new CSVPointAttributeEmulator(pointDimensionNames);
|
364 |
String columnName = CSVStoreParameters.getPointColumnName(parameters);
|
365 |
if( StringUtils.isBlank(columnName) ) {
|
366 |
columnName = "geom";
|
367 |
} |
368 |
EditableFeatureAttributeDescriptor attr = fType.add(columnName, DataTypes.GEOMETRY, emulator); |
369 |
GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
370 |
GeometryType gt; |
371 |
try {
|
372 |
if ( emulator.getFieldNames() != null && emulator.getFieldNames().length <= 2 ) { |
373 |
gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM2D); |
374 |
} else {
|
375 |
gt = geommgr.getGeometryType(Geometry.TYPES.GEOMETRY, Geometry.SUBTYPES.GEOM3D); |
376 |
} |
377 |
attr.setGeometryType(gt); |
378 |
} catch (Exception e) { |
379 |
LOGGER.warn("Can't set geometry type for the calculated field in '"+providerName+"' file '" + fullFileName + "'.", e); |
380 |
} |
381 |
} |
382 |
|
383 |
String geometry_column = CSVStoreParameters.getGeometryColumn(parameters);
|
384 |
if (!StringUtils.isEmpty(geometry_column)) {
|
385 |
EditableFeatureAttributeDescriptor attr = (EditableFeatureAttributeDescriptor) fType.get(geometry_column); |
386 |
if (attr != null ) { |
387 |
if( attr.getType() != DataTypes.GEOMETRY ) {
|
388 |
attr.setDataType(DataTypes.GEOMETRY); |
389 |
} |
390 |
GeometryManager geommgr = GeometryLocator.getGeometryManager(); |
391 |
GeometryType gt; |
392 |
try {
|
393 |
gt = geommgr.getGeometryType( |
394 |
CSVStoreParameters.getGeometryType(parameters), |
395 |
CSVStoreParameters.getGeometrySubType(parameters) |
396 |
); |
397 |
attr.setGeometryType(gt); |
398 |
} catch (Exception e) { |
399 |
LOGGER.warn("Can't set geometry type for the calculated field in CSV file '" + fullFileName + "'.", e); |
400 |
} |
401 |
fType.setDefaultGeometryAttributeName(geometry_column); |
402 |
} |
403 |
} |
404 |
return all_fields_declare_type;
|
405 |
} |
406 |
|
407 |
} |