svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.lib / src / main / java / org / gvsig / fmap / dal / store / simplereader / AutomaticDetectionOfTypes.java @ 47784
History | View | Annotate | Download (13.8 KB)
1 | 47638 | jjdelcerro | package org.gvsig.fmap.dal.store.simplereader; |
---|---|---|---|
2 | |||
3 | import java.io.IOException; |
||
4 | import java.math.BigDecimal; |
||
5 | import java.net.URL; |
||
6 | import java.util.ArrayList; |
||
7 | import java.util.List; |
||
8 | import java.util.Locale; |
||
9 | import org.apache.commons.lang3.StringUtils; |
||
10 | import org.gvsig.fmap.dal.DataTypes; |
||
11 | import org.gvsig.fmap.geom.GeometryCoercionContext; |
||
12 | import org.gvsig.fmap.geom.GeometryLocator; |
||
13 | import org.gvsig.tools.ToolsLocator; |
||
14 | import org.gvsig.tools.dataTypes.DataTypeUtils; |
||
15 | import org.gvsig.tools.dataTypes.DataTypesManager; |
||
16 | import org.gvsig.tools.dataTypes.Coercion; |
||
17 | import org.gvsig.tools.dataTypes.CoercionContext; |
||
18 | import org.gvsig.tools.i18n.I18nManager; |
||
19 | import org.gvsig.tools.task.SimpleTaskStatus; |
||
20 | |||
21 | /**
|
||
22 | *
|
||
23 | * @author jjdelcerro
|
||
24 | */
|
||
25 | public class AutomaticDetectionOfTypes { |
||
26 | |||
27 | public interface Rows { |
||
28 | |||
29 | public List<String> nextRowValues(); |
||
30 | } |
||
31 | |||
32 | public interface DetectedValue { |
||
33 | public int getType(); |
||
34 | public int getDisplaySize(); |
||
35 | public int getPrecision(); |
||
36 | public int getScale(); |
||
37 | public boolean isBlank(); |
||
38 | } |
||
39 | |||
40 | private static class DetectedValueImpl implements DetectedValue { |
||
41 | |||
42 | private int type; |
||
43 | private int displaySize; |
||
44 | private int integerDigits; |
||
45 | private int decimalDigits; |
||
46 | private boolean blank; |
||
47 | |||
48 | @Override
|
||
49 | public int getType() { |
||
50 | return this.type; |
||
51 | } |
||
52 | |||
53 | @Override
|
||
54 | public int getDisplaySize() { |
||
55 | return this.displaySize; |
||
56 | } |
||
57 | |||
58 | @Override
|
||
59 | public int getPrecision() { |
||
60 | return this.decimalDigits + this.integerDigits; |
||
61 | } |
||
62 | |||
63 | @Override
|
||
64 | public int getScale() { |
||
65 | return this.decimalDigits; |
||
66 | } |
||
67 | |||
68 | public boolean isBlank() { |
||
69 | return this.blank; |
||
70 | } |
||
71 | } |
||
72 | |||
73 | private static class PossibleDataType { |
||
74 | |||
75 | public boolean possibleInt = true; |
||
76 | public boolean possibleFloat = true; |
||
77 | public boolean possibleDouble = true; |
||
78 | public boolean possibleDecimal = true; |
||
79 | public boolean possibleLong = true; |
||
80 | public boolean possibleURL = true; |
||
81 | public boolean possibleDate = true; |
||
82 | public boolean possibleTime = true; |
||
83 | public boolean possibleTimestamp = true; |
||
84 | public boolean possibleGeometry = true; |
||
85 | } |
||
86 | |||
87 | private final String filename; |
||
88 | |||
89 | public AutomaticDetectionOfTypes() {
|
||
90 | this("(unknown)"); |
||
91 | } |
||
92 | |||
93 | public AutomaticDetectionOfTypes(String filename) { |
||
94 | this.filename = filename;
|
||
95 | } |
||
96 | |||
97 | private String getFullFileName() { |
||
98 | return this.filename; |
||
99 | } |
||
100 | |||
101 | @SuppressWarnings({"UseSpecificCatch", "ResultOfObjectAllocationIgnored"}) |
||
102 | public DetectedValue[] detect(int columns, |
||
103 | Rows rows, |
||
104 | boolean isFirstLineHeader,
|
||
105 | Locale locale,
|
||
106 | SimpleTaskStatus status |
||
107 | ) throws IOException { |
||
108 | List<PossibleDataType> possibleDataTypes;
|
||
109 | List<DetectedValueImpl> detectedValues = new ArrayList<>(columns); |
||
110 | I18nManager i18n = ToolsLocator.getI18nManager(); |
||
111 | |||
112 | if( status!=null ) { |
||
113 | status.message(i18n.getTranslation("_Types_detection"));
|
||
114 | } |
||
115 | |||
116 | int lineno = 0; |
||
117 | try {
|
||
118 | if (isFirstLineHeader) {
|
||
119 | rows.nextRowValues(); |
||
120 | lineno++; |
||
121 | } |
||
122 | possibleDataTypes = new ArrayList<>(columns); |
||
123 | for (int i = 0; i < columns; i++) { |
||
124 | possibleDataTypes.add(new PossibleDataType());
|
||
125 | detectedValues.add(new DetectedValueImpl());
|
||
126 | } |
||
127 | if (locale == null) { |
||
128 | locale = Locale.getDefault();
|
||
129 | } |
||
130 | DataTypesManager typeManager = ToolsLocator.getDataTypesManager(); |
||
131 | Coercion toDecimal = typeManager.getCoercion(DataTypes.DECIMAL); |
||
132 | Coercion toDouble = typeManager.getCoercion(DataTypes.DOUBLE); |
||
133 | Coercion toFloat = typeManager.getCoercion(DataTypes.FLOAT); |
||
134 | Coercion toDate = typeManager.getCoercion(DataTypes.DATE); |
||
135 | Coercion toTime = typeManager.getCoercion(DataTypes.TIME); |
||
136 | Coercion toTimestamp = typeManager.getCoercion(DataTypes.TIMESTAMP); |
||
137 | Coercion toInt = typeManager.getCoercion(DataTypes.INT); |
||
138 | Coercion toLong = typeManager.getCoercion(DataTypes.LONG); |
||
139 | Coercion toGeom = typeManager.getCoercion(DataTypes.GEOMETRY); |
||
140 | |||
141 | GeometryCoercionContext geometryCoercionContext = GeometryLocator.getGeometryManager().createGeometryCoercionContext(); |
||
142 | geometryCoercionContext.setMode(GeometryCoercionContext.MODE_ONERROR_THROW); |
||
143 | |||
144 | CoercionContext coercionContext = DataTypeUtils.coerceContextLocale(locale); |
||
145 | List<String> row = rows.nextRowValues(); |
||
146 | lineno++; |
||
147 | |||
148 | int detectedValuesSize = detectedValues.size();
|
||
149 | while (row != null) { |
||
150 | if( status!=null ) { |
||
151 | status.incrementCurrentValue(); |
||
152 | if( status.isCancellationRequested() ) {
|
||
153 | status.cancel(); |
||
154 | break;
|
||
155 | } |
||
156 | } |
||
157 | int rowsize = row.size();
|
||
158 | if( rowsize>detectedValuesSize ) {
|
||
159 | for (int i = detectedValuesSize; i < rowsize; i++) { |
||
160 | possibleDataTypes.add(new PossibleDataType());
|
||
161 | detectedValues.add(new DetectedValueImpl());
|
||
162 | } |
||
163 | detectedValuesSize = detectedValues.size(); |
||
164 | } |
||
165 | for (int i = 0; i < rowsize; i++) { |
||
166 | while( possibleDataTypes.size()<row.size() ) {
|
||
167 | possibleDataTypes.add(new PossibleDataType());
|
||
168 | } |
||
169 | String rawvalue = row.get(i);
|
||
170 | if( rawvalue == null ) { |
||
171 | continue;
|
||
172 | } |
||
173 | |||
174 | PossibleDataType possibleDataType = possibleDataTypes.get(i); |
||
175 | DetectedValueImpl detectedValue = detectedValues.get(i); |
||
176 | if( detectedValue.blank ) {
|
||
177 | detectedValue.blank = StringUtils.isBlank(rawvalue); |
||
178 | } |
||
179 | int displaySize = rawvalue.length();
|
||
180 | if( displaySize>detectedValue.displaySize ) {
|
||
181 | detectedValue.displaySize = displaySize; |
||
182 | } |
||
183 | if (possibleDataType.possibleDecimal) {
|
||
184 | try {
|
||
185 | BigDecimal decimal = (BigDecimal) toDecimal.coerce(rawvalue, coercionContext); |
||
186 | possibleDataType.possibleDecimal = true;
|
||
187 | if( decimal.scale() > detectedValue.decimalDigits ) {
|
||
188 | detectedValue.decimalDigits = decimal.scale(); |
||
189 | } |
||
190 | int integerDigits = decimal.precision() - decimal.scale();
|
||
191 | if( integerDigits>detectedValue.integerDigits ) {
|
||
192 | detectedValue.integerDigits = integerDigits; |
||
193 | } |
||
194 | } catch (Exception ex) { |
||
195 | possibleDataType.possibleDecimal = false;
|
||
196 | } |
||
197 | } |
||
198 | if (possibleDataType.possibleDouble) {
|
||
199 | try {
|
||
200 | toDouble.coerce(rawvalue, coercionContext); |
||
201 | possibleDataType.possibleDouble = true;
|
||
202 | } catch (Exception ex) { |
||
203 | possibleDataType.possibleDouble = false;
|
||
204 | } |
||
205 | } |
||
206 | if (possibleDataType.possibleFloat) {
|
||
207 | try {
|
||
208 | toFloat.coerce(rawvalue, coercionContext); |
||
209 | possibleDataType.possibleFloat = true;
|
||
210 | } catch (Exception ex) { |
||
211 | possibleDataType.possibleFloat = false;
|
||
212 | } |
||
213 | } |
||
214 | if (possibleDataType.possibleLong) {
|
||
215 | possibleDataType.possibleLong = isValidLong(rawvalue); |
||
216 | } |
||
217 | if (possibleDataType.possibleInt) {
|
||
218 | possibleDataType.possibleInt = isValidInteger(rawvalue); |
||
219 | } |
||
220 | if (possibleDataType.possibleDate) {
|
||
221 | try {
|
||
222 | toDate.coerce(rawvalue, coercionContext); |
||
223 | possibleDataType.possibleDate = true;
|
||
224 | } catch (Exception ex) { |
||
225 | possibleDataType.possibleDate = false;
|
||
226 | } |
||
227 | } |
||
228 | if (possibleDataType.possibleTime) {
|
||
229 | try {
|
||
230 | toTime.coerce(rawvalue, coercionContext); |
||
231 | possibleDataType.possibleTime = true;
|
||
232 | } catch (Exception ex) { |
||
233 | possibleDataType.possibleTime = false;
|
||
234 | } |
||
235 | } |
||
236 | if (possibleDataType.possibleTimestamp) {
|
||
237 | try {
|
||
238 | toTimestamp.coerce(rawvalue, coercionContext); |
||
239 | possibleDataType.possibleTimestamp = true;
|
||
240 | } catch (Exception ex) { |
||
241 | possibleDataType.possibleTimestamp = false;
|
||
242 | } |
||
243 | } |
||
244 | if (possibleDataType.possibleURL) {
|
||
245 | try {
|
||
246 | new URL((String) rawvalue); |
||
247 | possibleDataType.possibleURL = true;
|
||
248 | } catch (Exception ex) { |
||
249 | possibleDataType.possibleURL = false;
|
||
250 | } |
||
251 | } |
||
252 | |||
253 | if (possibleDataType.possibleGeometry) {
|
||
254 | try {
|
||
255 | toGeom.coerce((String) rawvalue, geometryCoercionContext);
|
||
256 | possibleDataType.possibleGeometry = true;
|
||
257 | } catch (Exception ex) { |
||
258 | possibleDataType.possibleGeometry = false;
|
||
259 | } |
||
260 | } |
||
261 | } |
||
262 | row = rows.nextRowValues(); |
||
263 | lineno++; |
||
264 | } |
||
265 | if( status!=null ) { |
||
266 | status.setRangeOfValues(0, lineno);
|
||
267 | status.setCurValue(0);
|
||
268 | } |
||
269 | int n = 0; |
||
270 | for (PossibleDataType possibleDataType : possibleDataTypes) {
|
||
271 | if (possibleDataType.possibleInt) {
|
||
272 | detectedValues.get(n++).type = DataTypes.INT; |
||
273 | continue;
|
||
274 | } |
||
275 | if (possibleDataType.possibleLong) {
|
||
276 | detectedValues.get(n++).type = DataTypes.LONG; |
||
277 | continue;
|
||
278 | } |
||
279 | if (possibleDataType.possibleDecimal) {
|
||
280 | // Preferimos un Decimal que un Float/Double
|
||
281 | detectedValues.get(n++).type = DataTypes.DECIMAL; |
||
282 | continue;
|
||
283 | } |
||
284 | if (possibleDataType.possibleFloat) {
|
||
285 | // Forzamos los float a double para evitar perder precision
|
||
286 | detectedValues.get(n++).type = DataTypes.DOUBLE; |
||
287 | continue;
|
||
288 | } |
||
289 | if (possibleDataType.possibleDouble) {
|
||
290 | detectedValues.get(n++).type = DataTypes.DOUBLE; |
||
291 | continue;
|
||
292 | } |
||
293 | if (possibleDataType.possibleURL) {
|
||
294 | detectedValues.get(n++).type = DataTypes.URL; |
||
295 | continue;
|
||
296 | } |
||
297 | if (possibleDataType.possibleDate) {
|
||
298 | detectedValues.get(n++).type = DataTypes.DATE; |
||
299 | continue;
|
||
300 | } |
||
301 | if (possibleDataType.possibleTime) {
|
||
302 | detectedValues.get(n++).type = DataTypes.TIME; |
||
303 | continue;
|
||
304 | } |
||
305 | if (possibleDataType.possibleTimestamp) {
|
||
306 | detectedValues.get(n++).type = DataTypes.TIMESTAMP; |
||
307 | continue;
|
||
308 | } |
||
309 | if (possibleDataType.possibleGeometry) {
|
||
310 | detectedValues.get(n++).type = DataTypes.GEOMETRY; |
||
311 | continue;
|
||
312 | } |
||
313 | detectedValues.get(n++).type = DataTypes.STRING; |
||
314 | } |
||
315 | } catch (Throwable ex) { |
||
316 | status.abort(); |
||
317 | throw new RuntimeException("Problems reading file '" + this.getFullFileName() + "' near line " + lineno + ".", ex); |
||
318 | } |
||
319 | DetectedValue[] r = detectedValues.toArray(new DetectedValue[detectedValues.size()]); |
||
320 | return r;
|
||
321 | } |
||
322 | |||
323 | @SuppressWarnings("UseSpecificCatch") |
||
324 | private boolean isValidLong(String s) { |
||
325 | if (s == null) { |
||
326 | return true; |
||
327 | } |
||
328 | s = s.trim().toLowerCase(); |
||
329 | if (s.isEmpty()) {
|
||
330 | return true; |
||
331 | } |
||
332 | try {
|
||
333 | if (s.startsWith("0x")) { |
||
334 | Long.valueOf(s.substring(2), 16); |
||
335 | } else {
|
||
336 | Long.valueOf(s);
|
||
337 | } |
||
338 | return true; |
||
339 | } catch (Exception ex) { |
||
340 | return false; |
||
341 | } |
||
342 | } |
||
343 | |||
344 | @SuppressWarnings("UseSpecificCatch") |
||
345 | private boolean isValidInteger(String s) { |
||
346 | if (s == null) { |
||
347 | return true; |
||
348 | } |
||
349 | s = s.trim().toLowerCase(); |
||
350 | if (s.isEmpty()) {
|
||
351 | return true; |
||
352 | } |
||
353 | try {
|
||
354 | if (s.startsWith("0x")) { |
||
355 | Integer.valueOf(s.substring(2), 16); |
||
356 | } else {
|
||
357 | Integer.valueOf(s);
|
||
358 | } |
||
359 | return true; |
||
360 | } catch (Exception ex) { |
||
361 | return false; |
||
362 | } |
||
363 | } |
||
364 | |||
365 | } |