svn-gvsig-desktop / trunk / extensions / extNormalization / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 22982
History | View | Annotate | Download (14.1 KB)
1 |
/* gvSIG. Geographic Information System of the Valencian Government
|
---|---|
2 |
*
|
3 |
* Copyright (C) 2007-2008 Infrastructures and Transports Department
|
4 |
* of the Valencian Government (CIT)
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 2
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/*
|
24 |
* AUTHORS (In addition to CIT):
|
25 |
* 2008 Prodevelop S.L. main development
|
26 |
*/
|
27 |
|
28 |
package org.gvsig.normalization.operations; |
29 |
|
30 |
import java.text.DecimalFormat; |
31 |
import java.text.DecimalFormatSymbols; |
32 |
import java.util.ArrayList; |
33 |
import java.util.List; |
34 |
import java.util.Locale; |
35 |
|
36 |
import javax.swing.event.ChangeEvent; |
37 |
import javax.swing.event.ChangeListener; |
38 |
|
39 |
import org.apache.log4j.Logger; |
40 |
import org.gvsig.patterns.normalization.Datevalue; |
41 |
import org.gvsig.patterns.normalization.Decimalvalue; |
42 |
import org.gvsig.patterns.normalization.Element; |
43 |
import org.gvsig.patterns.normalization.Fieldtype; |
44 |
import org.gvsig.patterns.normalization.Integervalue; |
45 |
import org.gvsig.patterns.normalization.Patternnormalization; |
46 |
import org.gvsig.patterns.normalization.Stringvalue; |
47 |
|
48 |
/**
|
49 |
* This class tokens strings
|
50 |
*
|
51 |
* @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
|
52 |
* @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
|
53 |
*
|
54 |
*/
|
55 |
|
56 |
public class NormAlgorithm { |
57 |
|
58 |
// ATTRIBUTES
|
59 |
|
60 |
private static final Logger log = Logger.getLogger(NormAlgorithm.class); |
61 |
|
62 |
private Patternnormalization pat;
|
63 |
|
64 |
private int numFields; |
65 |
|
66 |
private int row; |
67 |
|
68 |
private Element[] aad; |
69 |
|
70 |
@SuppressWarnings("unchecked") |
71 |
private ArrayList listeners = new ArrayList(); |
72 |
|
73 |
/**
|
74 |
* Builder
|
75 |
*
|
76 |
* @param _pat
|
77 |
* Normalization pattern
|
78 |
*/
|
79 |
public NormAlgorithm(Patternnormalization _pat) {
|
80 |
pat = _pat; |
81 |
aad = pat.getArrayElements(); |
82 |
numFields = aad.length; |
83 |
} |
84 |
|
85 |
// METHODS
|
86 |
|
87 |
/**
|
88 |
* This method cuts a chain in several parts and they are returned in a List
|
89 |
*
|
90 |
* @param _chain
|
91 |
* strings
|
92 |
* @return list with the strings tokenized
|
93 |
*/
|
94 |
@SuppressWarnings("unchecked") |
95 |
public List splitChain(String _chain) { |
96 |
String preChain = _chain;
|
97 |
String postChain = ""; |
98 |
List subStrings = new ArrayList(); |
99 |
|
100 |
// EXCEPTIONAL CASES
|
101 |
if(_chain == null){ |
102 |
return subStrings;
|
103 |
} |
104 |
|
105 |
else if (_chain.compareToIgnoreCase("") == 0) { |
106 |
subStrings.add(_chain); |
107 |
return subStrings;
|
108 |
} |
109 |
|
110 |
|
111 |
// NORMAL CASE
|
112 |
else {
|
113 |
int fw = 0; |
114 |
int init = 0; |
115 |
String subChain = ""; |
116 |
|
117 |
for (int i = 0; i < numFields; i++) { |
118 |
|
119 |
if (preChain.length() == 0) { |
120 |
return subStrings;
|
121 |
} else {
|
122 |
|
123 |
fw = aad[i].getFieldwidth(); |
124 |
// Cut chain with fixed width
|
125 |
if (fw > 0 && preChain.length() > fw) { |
126 |
subChain = preChain.substring(init, fw); |
127 |
subStrings.add(subChain); |
128 |
postChain = preChain.substring(fw); |
129 |
preChain = postChain; |
130 |
|
131 |
} else if (fw > 0 && preChain.length() <= fw) { |
132 |
subStrings.add(preChain); |
133 |
return subStrings;
|
134 |
} |
135 |
// Cut chain with separators
|
136 |
else {
|
137 |
// Load specific separators
|
138 |
List separators = loadSpecificSeparators(aad[i]);
|
139 |
boolean join = withJoinSeparators(aad[i]);
|
140 |
// Search the first delimiter in the chain
|
141 |
int posi = calculatePosition(separators, preChain);
|
142 |
int tamSep = calculateSizeSep(separators, preChain);
|
143 |
// Firsts elements
|
144 |
if (i < numFields - 1) { |
145 |
|
146 |
if (join) {
|
147 |
while (posi == 0) { |
148 |
preChain = deleteFirst(preChain); |
149 |
posi = calculatePosition(separators, |
150 |
preChain); |
151 |
if(preChain.length() == 0){ |
152 |
break;
|
153 |
} |
154 |
} |
155 |
subChain = preChain.substring(0, posi);
|
156 |
try {
|
157 |
postChain = preChain.substring(posi |
158 |
+ tamSep); |
159 |
} catch (Exception e) { |
160 |
postChain = "";
|
161 |
} |
162 |
subStrings.add(subChain); |
163 |
preChain = postChain; |
164 |
} else {
|
165 |
subChain = preChain.substring(0, posi);
|
166 |
postChain = preChain.substring(posi + tamSep); |
167 |
subStrings.add(subChain); |
168 |
preChain = postChain; |
169 |
} |
170 |
|
171 |
} |
172 |
// Last element
|
173 |
else {
|
174 |
subStrings.add(preChain); |
175 |
} |
176 |
} |
177 |
} |
178 |
} |
179 |
} |
180 |
return subStrings;
|
181 |
} |
182 |
|
183 |
/**
|
184 |
* This method cuts a chain in several parts from separators
|
185 |
*
|
186 |
* @param chain
|
187 |
* string
|
188 |
* @param fields
|
189 |
* fields number
|
190 |
* @param separators
|
191 |
* array of characters
|
192 |
* @param joinDelimiters
|
193 |
* with or without joinDelimiters
|
194 |
* @return list with the strings tokenized
|
195 |
*/
|
196 |
@SuppressWarnings("unchecked") |
197 |
public static List splitChainBySeparators(String chain, int fields, |
198 |
String[] separators, boolean joinDelimiters) { |
199 |
|
200 |
List subStrings = new ArrayList(); |
201 |
int posTemp = -1; |
202 |
String separator;
|
203 |
String chain2 = chain;
|
204 |
int campos = fields;
|
205 |
|
206 |
// EXCEPTIONAL CASES
|
207 |
if (chain.compareToIgnoreCase("") == 0 || campos == 0 |
208 |
|| separators.length < 1) {
|
209 |
subStrings.add(chain); |
210 |
return subStrings;
|
211 |
} |
212 |
|
213 |
// NORMAL CASE
|
214 |
else {
|
215 |
|
216 |
// Only (parts-1) loops
|
217 |
for (int i = 0; i < (campos - 1); i++) { |
218 |
int posi = Integer.MAX_VALUE; |
219 |
String firstChain;
|
220 |
for (int j = 0; j < separators.length; j++) { |
221 |
separator = separators[j]; |
222 |
posTemp = chain2.indexOf(separator); |
223 |
if (posTemp != -1 && posTemp < posi) { |
224 |
posi = posTemp; |
225 |
} |
226 |
posTemp = -1;
|
227 |
} |
228 |
if (posi == 0 && joinDelimiters) { |
229 |
campos++; |
230 |
chain2 = chain2.substring(posi + 1);
|
231 |
} else {
|
232 |
firstChain = chain2.substring(0, posi);
|
233 |
chain2 = chain2.substring(posi + 1);
|
234 |
subStrings.add(firstChain); |
235 |
// In the last loop add the first chain and the rest of
|
236 |
// chain
|
237 |
if (i == (campos - 2)) { |
238 |
subStrings.add(chain2); |
239 |
} |
240 |
} |
241 |
} |
242 |
return subStrings;
|
243 |
} |
244 |
} |
245 |
|
246 |
/**
|
247 |
* This method cuts a chain in several parts from fixed width
|
248 |
*
|
249 |
* @param chain
|
250 |
* string
|
251 |
* @param fieldWidth
|
252 |
* array with fields widths
|
253 |
* @return list with the strings tokenized
|
254 |
*/
|
255 |
@SuppressWarnings("unchecked") |
256 |
public static List splitChainByFixedWidth(String chain, int[] fieldWidth) { |
257 |
|
258 |
List subStrings = new ArrayList(); |
259 |
int elements = fieldWidth.length;
|
260 |
String subChain;
|
261 |
int inicio = 0; |
262 |
int fin = 0; |
263 |
|
264 |
// EXCEPTIONAL CASES
|
265 |
if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) { |
266 |
subStrings.add(chain); |
267 |
return subStrings;
|
268 |
} |
269 |
|
270 |
// NORMAL CASE
|
271 |
else {
|
272 |
for (int i = 0; i < elements; i++) { |
273 |
fin = fin + fieldWidth[i]; |
274 |
subChain = chain.substring(inicio, fin); |
275 |
subStrings.add(subChain); |
276 |
inicio = fin; |
277 |
} |
278 |
return subStrings;
|
279 |
} |
280 |
} |
281 |
|
282 |
/**
|
283 |
* This method filters the split chains with the in-separators
|
284 |
*
|
285 |
* @param chains
|
286 |
* strings of the one row
|
287 |
* @return strings filtered by type
|
288 |
*/
|
289 |
@SuppressWarnings("unchecked") |
290 |
public List filterSplitChains(List chains) { |
291 |
|
292 |
DecimalFormat numForm = (DecimalFormat) DecimalFormat |
293 |
.getInstance(Locale.getDefault());
|
294 |
DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
|
295 |
|
296 |
Fieldtype nft = null;
|
297 |
|
298 |
String decsep;
|
299 |
char cdecsep;
|
300 |
String thosep;
|
301 |
char cthosep;
|
302 |
String txsep;
|
303 |
|
304 |
List postChain = new ArrayList(); |
305 |
String aux = ""; |
306 |
|
307 |
for (int i = 0; i < numFields; i++) { |
308 |
|
309 |
nft = aad[i].getFieldtype(); |
310 |
|
311 |
decsep = aad[i].getInfieldseparators().getDecimalseparator().trim(); |
312 |
cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep |
313 |
.charAt(0);
|
314 |
thosep = aad[i].getInfieldseparators().getThousandseparator().trim(); |
315 |
cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep |
316 |
.charAt(0);
|
317 |
|
318 |
txsep = aad[i].getInfieldseparators().getTextseparator().trim(); |
319 |
|
320 |
if (txsep.compareToIgnoreCase("\"") == 0) { |
321 |
txsep = "\"";
|
322 |
} |
323 |
|
324 |
simb.setDecimalSeparator(cdecsep); |
325 |
try {
|
326 |
simb.setGroupingSeparator(cthosep); |
327 |
} catch (RuntimeException e1) { |
328 |
log.error("Error setting the group separator", e1);
|
329 |
} |
330 |
// Fill fields empties
|
331 |
if (chains.size() < numFields) {
|
332 |
|
333 |
for (int j = chains.size(); j < numFields; j++) { |
334 |
chains.add(j, "");
|
335 |
} |
336 |
} |
337 |
|
338 |
if (((Integervalue) nft.getIntegervalue()) != null |
339 |
|| ((Decimalvalue) nft.getDecimalvalue()) != null) {
|
340 |
numForm.setDecimalFormatSymbols(simb); |
341 |
try {
|
342 |
String cadena = ((String) chains.get(i)).trim(); |
343 |
int num = cadena.length();
|
344 |
boolean comproba = test(cadena, cdecsep, cthosep);
|
345 |
if (num > 0 && comproba) { |
346 |
aux = (numForm.parse(cadena)).toString().trim(); |
347 |
} else {
|
348 |
aux = "";
|
349 |
} |
350 |
} catch (Exception e) { |
351 |
aux = "";
|
352 |
} |
353 |
} |
354 |
|
355 |
// Field type (VARCHAR)
|
356 |
if (((Stringvalue) nft.getStringvalue()) != null) { |
357 |
String cadena = (String) chains.get(i); |
358 |
if (cadena != null) { |
359 |
aux = cadena.replace(txsep, "");
|
360 |
} else {
|
361 |
aux = "";
|
362 |
} |
363 |
} |
364 |
|
365 |
// Field type (DATE)
|
366 |
if (((Datevalue) nft.getDatevalue()) != null) { |
367 |
String cadena = (String) chains.get(i); |
368 |
if (cadena.length() > 0 || cadena != null) { |
369 |
aux = cadena.replace(txsep, "");
|
370 |
} else {
|
371 |
aux = "";
|
372 |
} |
373 |
} |
374 |
postChain.add(aux); |
375 |
} |
376 |
return postChain;
|
377 |
|
378 |
} |
379 |
|
380 |
/**
|
381 |
* This method registers the listeners
|
382 |
*
|
383 |
* @param l
|
384 |
* listener
|
385 |
*/
|
386 |
@SuppressWarnings("unchecked") |
387 |
public void registerListener(ChangeListener l) { |
388 |
this.listeners.add(l);
|
389 |
} |
390 |
|
391 |
/**
|
392 |
* This method remove the listener registred
|
393 |
*
|
394 |
* @param l
|
395 |
* listener
|
396 |
*/
|
397 |
public void removeListener(ChangeListener l) { |
398 |
this.listeners.remove(l);
|
399 |
} |
400 |
|
401 |
/**
|
402 |
* This method removes all listeners
|
403 |
*/
|
404 |
public void removeAllListeners() { |
405 |
this.listeners.clear();
|
406 |
} |
407 |
|
408 |
/**
|
409 |
*
|
410 |
* @param evt
|
411 |
* event
|
412 |
*/
|
413 |
public void update(ChangeEvent evt) { |
414 |
|
415 |
for (int i = 0; i < listeners.size(); i++) { |
416 |
((ChangeListener) listeners.get(i)).stateChanged(evt);
|
417 |
} |
418 |
} |
419 |
|
420 |
/**
|
421 |
* Add message
|
422 |
*
|
423 |
* @param message
|
424 |
*/
|
425 |
public void update(String message) { |
426 |
ChangeEvent evt = new ChangeEvent(message); |
427 |
update(evt); |
428 |
} |
429 |
|
430 |
/**
|
431 |
* Set the row
|
432 |
*
|
433 |
* @param _row
|
434 |
*/
|
435 |
public void setRow(int _row) { |
436 |
row = _row; |
437 |
} |
438 |
|
439 |
/**
|
440 |
* This method loads the join separators attribute of one Element
|
441 |
*
|
442 |
* @param ad
|
443 |
* @return with or without joinSeparators
|
444 |
*/
|
445 |
private boolean withJoinSeparators(Element ad) { |
446 |
return ad.getFieldseparator().getJoinsep();
|
447 |
} |
448 |
|
449 |
/**
|
450 |
* This method deletes the first element of one substring
|
451 |
*
|
452 |
* @param chain
|
453 |
* initial string
|
454 |
* @return string
|
455 |
*/
|
456 |
private String deleteFirst(String chain) { |
457 |
String del = chain.substring(1); |
458 |
return del;
|
459 |
} |
460 |
|
461 |
/**
|
462 |
* This method gets the first position in the string of the separators
|
463 |
* group.
|
464 |
*
|
465 |
* @param separators
|
466 |
* separators characters list
|
467 |
* @param preChain
|
468 |
* initial string
|
469 |
* @return
|
470 |
*/
|
471 |
@SuppressWarnings("unchecked") |
472 |
private int calculatePosition(List separators, String preChain) { |
473 |
|
474 |
String separator;
|
475 |
int posTemp = -1; |
476 |
int posi = Integer.MAX_VALUE; |
477 |
for (int j = 0; j < separators.size(); j++) { |
478 |
separator = (String) separators.get(j);
|
479 |
posTemp = preChain.indexOf(separator); |
480 |
if (posTemp != -1 && posTemp < posi) { |
481 |
posi = posTemp; |
482 |
} |
483 |
posTemp = -1;
|
484 |
} |
485 |
if (posi > preChain.length()) {
|
486 |
posi = preChain.length(); |
487 |
} |
488 |
|
489 |
return posi;
|
490 |
} |
491 |
|
492 |
/**
|
493 |
* This method calculates the number of elements of one separators
|
494 |
*
|
495 |
* @param separators
|
496 |
* @param preChain
|
497 |
* @return number of elements
|
498 |
*/
|
499 |
|
500 |
@SuppressWarnings("unchecked") |
501 |
private int calculateSizeSep(List separators, String preChain) { |
502 |
|
503 |
String separator;
|
504 |
int posTemp = -1; |
505 |
int posi = Integer.MAX_VALUE; |
506 |
String sep = ""; |
507 |
for (int j = 0; j < separators.size(); j++) { |
508 |
separator = (String) separators.get(j);
|
509 |
posTemp = preChain.indexOf(separator); |
510 |
if (posTemp != -1 && posTemp < posi) { |
511 |
posi = posTemp; |
512 |
sep = separator; |
513 |
} |
514 |
posTemp = -1;
|
515 |
} |
516 |
|
517 |
return sep.length();
|
518 |
} |
519 |
|
520 |
/**
|
521 |
* This method loads the specifics separators of one Element
|
522 |
*
|
523 |
* @param adrElem
|
524 |
* @return separators list
|
525 |
*/
|
526 |
@SuppressWarnings("unchecked") |
527 |
private List loadSpecificSeparators(Element adrElem) { |
528 |
List separators = new ArrayList(); |
529 |
if (adrElem.getFieldseparator().getColonsep()) {
|
530 |
separators.add(",");
|
531 |
} |
532 |
if (adrElem.getFieldseparator().getSemicolonsep()) {
|
533 |
separators.add(";");
|
534 |
} |
535 |
if (adrElem.getFieldseparator().getTabsep()) {
|
536 |
separators.add("\t");
|
537 |
} |
538 |
if (adrElem.getFieldseparator().getSpacesep()) {
|
539 |
separators.add(" ");
|
540 |
} |
541 |
if (adrElem.getFieldseparator().getOthersep() != null |
542 |
&& adrElem.getFieldseparator().getOthersep() |
543 |
.compareToIgnoreCase("") != 0) { |
544 |
separators.add((String) adrElem.getFieldseparator().getOthersep());
|
545 |
} |
546 |
|
547 |
return separators;
|
548 |
} |
549 |
|
550 |
/**
|
551 |
* This method tests the numbers format
|
552 |
*
|
553 |
* @param str
|
554 |
* @param dec
|
555 |
* @param sep
|
556 |
* @return true if there aren't other characters
|
557 |
*/
|
558 |
private boolean test(String str, char dec, char sep) { |
559 |
String str2 = str.replaceAll("[0-9]", ""); |
560 |
str2 = str2.replaceAll("-", ""); |
561 |
|
562 |
String str3 = str2;
|
563 |
if (str2.indexOf(dec) >= 0) { |
564 |
int ind = str2.indexOf(String.valueOf(dec)); |
565 |
str3 = str2.substring(0, ind)
|
566 |
+ str2.substring(ind + 1, str2.length());
|
567 |
} |
568 |
String str4 = str3;
|
569 |
if (str3.indexOf(sep) >= 0) { |
570 |
int ind = str3.indexOf(String.valueOf(sep)); |
571 |
str4 = str3.substring(0, ind)
|
572 |
+ str3.substring(ind + 1, str3.length());
|
573 |
} |
574 |
|
575 |
return str4.length() == 0; |
576 |
} |
577 |
|
578 |
} |