root / trunk / extensions / extNormalization / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 22823
History | View | Annotate | Download (14.3 KB)
1 |
/* gvSIG. Geographic Information System of the Valencian Government
|
---|---|
2 |
*
|
3 |
* Copyright (C) 2007-2008 Infrastructures and Transports Department
|
4 |
* of the Valencian Government (CIT)
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 2
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/*
|
24 |
* AUTHORS (In addition to CIT):
|
25 |
* 2008 Prodevelop S.L. main development
|
26 |
*/
|
27 |
|
28 |
package org.gvsig.normalization.operations; |
29 |
|
30 |
import java.text.DecimalFormat; |
31 |
import java.text.DecimalFormatSymbols; |
32 |
import java.util.ArrayList; |
33 |
import java.util.List; |
34 |
import java.util.Locale; |
35 |
|
36 |
import javax.swing.event.ChangeEvent; |
37 |
import javax.swing.event.ChangeListener; |
38 |
|
39 |
import org.apache.log4j.Logger; |
40 |
import org.gvsig.geocoding.patterns.normalization.Addresselement; |
41 |
import org.gvsig.geocoding.patterns.normalization.Datevalue; |
42 |
import org.gvsig.geocoding.patterns.normalization.Decimalvalue; |
43 |
import org.gvsig.geocoding.patterns.normalization.Integervalue; |
44 |
import org.gvsig.geocoding.patterns.normalization.Newfieldtype; |
45 |
import org.gvsig.geocoding.patterns.normalization.Patternnormalization; |
46 |
import org.gvsig.geocoding.patterns.normalization.Stringvalue; |
47 |
|
48 |
/**
|
49 |
* This class tokens strings
|
50 |
*
|
51 |
* @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
|
52 |
* @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
|
53 |
*
|
54 |
*/
|
55 |
|
56 |
public class NormAlgorithm { |
57 |
|
58 |
// ATTRIBUTES
|
59 |
|
60 |
private static final Logger log = Logger.getLogger(NormAlgorithm.class); |
61 |
|
62 |
private Patternnormalization pat;
|
63 |
|
64 |
private int numFields; |
65 |
|
66 |
private int row; |
67 |
|
68 |
private Addresselement[] aad; |
69 |
|
70 |
@SuppressWarnings("unchecked") |
71 |
private ArrayList listeners = new ArrayList(); |
72 |
|
73 |
/**
|
74 |
* Builder
|
75 |
*
|
76 |
* @param _pat
|
77 |
* Normalization pattern
|
78 |
*/
|
79 |
public NormAlgorithm(Patternnormalization _pat) {
|
80 |
pat = _pat; |
81 |
aad = pat.getAddresselements().getAddresselement(); |
82 |
numFields = aad.length; |
83 |
|
84 |
} |
85 |
|
86 |
// METHODS
|
87 |
|
88 |
/**
|
89 |
* This method cuts a chain in several parts and they are returned in a List
|
90 |
*
|
91 |
* @param _chain
|
92 |
* strings
|
93 |
* @return list with the strings tokenized
|
94 |
*/
|
95 |
@SuppressWarnings("unchecked") |
96 |
public List splitChain(String _chain) { |
97 |
String preChain = _chain;
|
98 |
String postChain = ""; |
99 |
List subStrings = new ArrayList(); |
100 |
|
101 |
// EXCEPTIONAL CASES
|
102 |
if(_chain == null){ |
103 |
return subStrings;
|
104 |
} |
105 |
|
106 |
else if (_chain.compareToIgnoreCase("") == 0) { |
107 |
subStrings.add(_chain); |
108 |
return subStrings;
|
109 |
} |
110 |
|
111 |
|
112 |
// NORMAL CASE
|
113 |
else {
|
114 |
int fw = 0; |
115 |
int init = 0; |
116 |
String subChain = ""; |
117 |
|
118 |
for (int i = 0; i < numFields; i++) { |
119 |
|
120 |
if (preChain.length() == 0) { |
121 |
return subStrings;
|
122 |
} else {
|
123 |
|
124 |
fw = aad[i].getFieldwidth(); |
125 |
// Cut chain with fixed width
|
126 |
if (fw > 0 && preChain.length() > fw) { |
127 |
subChain = preChain.substring(init, fw); |
128 |
subStrings.add(subChain); |
129 |
postChain = preChain.substring(fw); |
130 |
preChain = postChain; |
131 |
|
132 |
} else if (fw > 0 && preChain.length() <= fw) { |
133 |
subStrings.add(preChain); |
134 |
return subStrings;
|
135 |
} |
136 |
// Cut chain with separators
|
137 |
else {
|
138 |
// Load specific separators
|
139 |
List separators = loadSpecificSeparators(aad[i]);
|
140 |
boolean join = withJoinSeparators(aad[i]);
|
141 |
// Search the first delimiter in the chain
|
142 |
int posi = calculatePosition(separators, preChain);
|
143 |
int tamSep = calculateSizeSep(separators, preChain);
|
144 |
// Firsts elements
|
145 |
if (i < numFields - 1) { |
146 |
|
147 |
if (join) {
|
148 |
while (posi == 0) { |
149 |
preChain = deleteFirst(preChain); |
150 |
posi = calculatePosition(separators, |
151 |
preChain); |
152 |
if(preChain.length() == 0){ |
153 |
break;
|
154 |
} |
155 |
} |
156 |
subChain = preChain.substring(0, posi);
|
157 |
try {
|
158 |
postChain = preChain.substring(posi |
159 |
+ tamSep); |
160 |
} catch (Exception e) { |
161 |
postChain = "";
|
162 |
} |
163 |
subStrings.add(subChain); |
164 |
preChain = postChain; |
165 |
} else {
|
166 |
subChain = preChain.substring(0, posi);
|
167 |
postChain = preChain.substring(posi + tamSep); |
168 |
subStrings.add(subChain); |
169 |
preChain = postChain; |
170 |
} |
171 |
|
172 |
} |
173 |
// Last element
|
174 |
else {
|
175 |
subStrings.add(preChain); |
176 |
} |
177 |
} |
178 |
} |
179 |
} |
180 |
} |
181 |
return subStrings;
|
182 |
} |
183 |
|
184 |
/**
|
185 |
* This method cuts a chain in several parts from separators
|
186 |
*
|
187 |
* @param chain
|
188 |
* string
|
189 |
* @param fields
|
190 |
* fields number
|
191 |
* @param separators
|
192 |
* array of characters
|
193 |
* @param joinDelimiters
|
194 |
* with or without joinDelimiters
|
195 |
* @return list with the strings tokenized
|
196 |
*/
|
197 |
@SuppressWarnings("unchecked") |
198 |
public static List splitChainBySeparators(String chain, int fields, |
199 |
String[] separators, boolean joinDelimiters) { |
200 |
|
201 |
List subStrings = new ArrayList(); |
202 |
int posTemp = -1; |
203 |
String separator;
|
204 |
String chain2 = chain;
|
205 |
int campos = fields;
|
206 |
|
207 |
// EXCEPTIONAL CASES
|
208 |
if (chain.compareToIgnoreCase("") == 0 || campos == 0 |
209 |
|| separators.length < 1) {
|
210 |
subStrings.add(chain); |
211 |
return subStrings;
|
212 |
} |
213 |
|
214 |
// NORMAL CASE
|
215 |
else {
|
216 |
|
217 |
// Only (parts-1) loops
|
218 |
for (int i = 0; i < (campos - 1); i++) { |
219 |
int posi = Integer.MAX_VALUE; |
220 |
String firstChain;
|
221 |
for (int j = 0; j < separators.length; j++) { |
222 |
separator = separators[j]; |
223 |
posTemp = chain2.indexOf(separator); |
224 |
if (posTemp != -1 && posTemp < posi) { |
225 |
posi = posTemp; |
226 |
} |
227 |
posTemp = -1;
|
228 |
} |
229 |
if (posi == 0 && joinDelimiters) { |
230 |
campos++; |
231 |
chain2 = chain2.substring(posi + 1);
|
232 |
} else {
|
233 |
firstChain = chain2.substring(0, posi);
|
234 |
chain2 = chain2.substring(posi + 1);
|
235 |
subStrings.add(firstChain); |
236 |
// In the last loop add the first chain and the rest of
|
237 |
// chain
|
238 |
if (i == (campos - 2)) { |
239 |
subStrings.add(chain2); |
240 |
} |
241 |
} |
242 |
} |
243 |
return subStrings;
|
244 |
} |
245 |
} |
246 |
|
247 |
/**
|
248 |
* This method cuts a chain in several parts from fixed width
|
249 |
*
|
250 |
* @param chain
|
251 |
* string
|
252 |
* @param fieldWidth
|
253 |
* array with fields widths
|
254 |
* @return list with the strings tokenized
|
255 |
*/
|
256 |
@SuppressWarnings("unchecked") |
257 |
public static List splitChainByFixedWidth(String chain, int[] fieldWidth) { |
258 |
|
259 |
List subStrings = new ArrayList(); |
260 |
int elements = fieldWidth.length;
|
261 |
String subChain;
|
262 |
int inicio = 0; |
263 |
int fin = 0; |
264 |
|
265 |
// EXCEPTIONAL CASES
|
266 |
if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) { |
267 |
subStrings.add(chain); |
268 |
return subStrings;
|
269 |
} |
270 |
|
271 |
// NORMAL CASE
|
272 |
else {
|
273 |
for (int i = 0; i < elements; i++) { |
274 |
fin = fin + fieldWidth[i]; |
275 |
subChain = chain.substring(inicio, fin); |
276 |
subStrings.add(subChain); |
277 |
inicio = fin; |
278 |
} |
279 |
return subStrings;
|
280 |
} |
281 |
} |
282 |
|
283 |
/**
|
284 |
* This method filters the split chains with the in-separators
|
285 |
*
|
286 |
* @param chains
|
287 |
* strings of the one row
|
288 |
* @return strings filtered by type
|
289 |
*/
|
290 |
@SuppressWarnings("unchecked") |
291 |
public List filterSplitChains(List chains) { |
292 |
|
293 |
DecimalFormat numForm = (DecimalFormat) DecimalFormat |
294 |
.getInstance(Locale.getDefault());
|
295 |
DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
|
296 |
|
297 |
Newfieldtype nft = null;
|
298 |
|
299 |
String decsep;
|
300 |
char cdecsep;
|
301 |
String thosep;
|
302 |
char cthosep;
|
303 |
String txsep;
|
304 |
|
305 |
List postChain = new ArrayList(); |
306 |
String aux = ""; |
307 |
|
308 |
for (int i = 0; i < numFields; i++) { |
309 |
|
310 |
nft = aad[i].getNewfieldtype(); |
311 |
|
312 |
decsep = aad[i].getInfieldseparators().getDecimalseparator() |
313 |
.getValue().trim(); |
314 |
cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep |
315 |
.charAt(0);
|
316 |
thosep = aad[i].getInfieldseparators().getThousandseparator() |
317 |
.getValue().trim(); |
318 |
cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep |
319 |
.charAt(0);
|
320 |
|
321 |
txsep = aad[i].getInfieldseparators().getTextseparator().getValue() |
322 |
.trim(); |
323 |
|
324 |
if (txsep.compareToIgnoreCase("\"") == 0) { |
325 |
txsep = "\"";
|
326 |
} |
327 |
|
328 |
simb.setDecimalSeparator(cdecsep); |
329 |
try {
|
330 |
simb.setGroupingSeparator(cthosep); |
331 |
} catch (RuntimeException e1) { |
332 |
log.error("Error setting the group separator", e1);
|
333 |
} |
334 |
// Fill fields empties
|
335 |
if (chains.size() < numFields) {
|
336 |
|
337 |
for (int j = chains.size(); j < numFields; j++) { |
338 |
chains.add(j, "");
|
339 |
} |
340 |
} |
341 |
|
342 |
if (((Integervalue) nft.getIntegervalue()) != null |
343 |
|| ((Decimalvalue) nft.getDecimalvalue()) != null) {
|
344 |
numForm.setDecimalFormatSymbols(simb); |
345 |
try {
|
346 |
String cadena = (String) chains.get(i); |
347 |
int num = cadena.length();
|
348 |
boolean comproba = test(cadena, cdecsep, cthosep);
|
349 |
if (num > 0 && comproba) { |
350 |
aux = (numForm.parse(cadena)).toString().trim(); |
351 |
} else {
|
352 |
aux = "";
|
353 |
} |
354 |
} catch (Exception e) { |
355 |
aux = "";
|
356 |
} |
357 |
} |
358 |
|
359 |
// Field type (VARCHAR)
|
360 |
if (((Stringvalue) nft.getStringvalue()) != null) { |
361 |
String cadena = (String) chains.get(i); |
362 |
if (cadena != null) { |
363 |
aux = cadena.replace(txsep, "");
|
364 |
} else {
|
365 |
aux = "";
|
366 |
} |
367 |
} |
368 |
|
369 |
// Field type (DATE)
|
370 |
if (((Datevalue) nft.getDatevalue()) != null) { |
371 |
String cadena = (String) chains.get(i); |
372 |
if (cadena.length() > 0 || cadena != null) { |
373 |
aux = cadena.replace(txsep, "");
|
374 |
} else {
|
375 |
aux = "";
|
376 |
} |
377 |
} |
378 |
postChain.add(aux); |
379 |
} |
380 |
return postChain;
|
381 |
|
382 |
} |
383 |
|
384 |
/**
|
385 |
* This method registers the listeners
|
386 |
*
|
387 |
* @param l
|
388 |
* listener
|
389 |
*/
|
390 |
@SuppressWarnings("unchecked") |
391 |
public void registerListener(ChangeListener l) { |
392 |
this.listeners.add(l);
|
393 |
} |
394 |
|
395 |
/**
|
396 |
* This method remove the listener registred
|
397 |
*
|
398 |
* @param l
|
399 |
* listener
|
400 |
*/
|
401 |
public void removeListener(ChangeListener l) { |
402 |
this.listeners.remove(l);
|
403 |
} |
404 |
|
405 |
/**
|
406 |
* This method removes all listeners
|
407 |
*/
|
408 |
public void removeAllListeners() { |
409 |
this.listeners.clear();
|
410 |
} |
411 |
|
412 |
/**
|
413 |
*
|
414 |
* @param evt
|
415 |
* event
|
416 |
*/
|
417 |
public void update(ChangeEvent evt) { |
418 |
|
419 |
for (int i = 0; i < listeners.size(); i++) { |
420 |
((ChangeListener) listeners.get(i)).stateChanged(evt);
|
421 |
} |
422 |
} |
423 |
|
424 |
/**
|
425 |
* Add message
|
426 |
*
|
427 |
* @param message
|
428 |
*/
|
429 |
public void update(String message) { |
430 |
ChangeEvent evt = new ChangeEvent(message); |
431 |
update(evt); |
432 |
} |
433 |
|
434 |
/**
|
435 |
* Set the row
|
436 |
*
|
437 |
* @param _row
|
438 |
*/
|
439 |
public void setRow(int _row) { |
440 |
row = _row; |
441 |
} |
442 |
|
443 |
/**
|
444 |
* This method loads the join separators attribute of one Addresselement
|
445 |
*
|
446 |
* @param ad
|
447 |
* @return with or without joinSeparators
|
448 |
*/
|
449 |
private boolean withJoinSeparators(Addresselement ad) { |
450 |
return ad.getFieldseparator().getJoinsep();
|
451 |
} |
452 |
|
453 |
/**
|
454 |
* This method deletes the first element of one substring
|
455 |
*
|
456 |
* @param chain
|
457 |
* initial string
|
458 |
* @return string
|
459 |
*/
|
460 |
private String deleteFirst(String chain) { |
461 |
String del = chain.substring(1); |
462 |
return del;
|
463 |
} |
464 |
|
465 |
/**
|
466 |
* This method gets the first position in the string of the separators
|
467 |
* group.
|
468 |
*
|
469 |
* @param separators
|
470 |
* separators characters list
|
471 |
* @param preChain
|
472 |
* initial string
|
473 |
* @return
|
474 |
*/
|
475 |
@SuppressWarnings("unchecked") |
476 |
private int calculatePosition(List separators, String preChain) { |
477 |
|
478 |
String separator;
|
479 |
int posTemp = -1; |
480 |
int posi = Integer.MAX_VALUE; |
481 |
for (int j = 0; j < separators.size(); j++) { |
482 |
separator = (String) separators.get(j);
|
483 |
posTemp = preChain.indexOf(separator); |
484 |
if (posTemp != -1 && posTemp < posi) { |
485 |
posi = posTemp; |
486 |
} |
487 |
posTemp = -1;
|
488 |
} |
489 |
if (posi > preChain.length()) {
|
490 |
posi = preChain.length(); |
491 |
} |
492 |
|
493 |
return posi;
|
494 |
} |
495 |
|
496 |
/**
|
497 |
* This method calculates the number of elements of one separators
|
498 |
*
|
499 |
* @param separators
|
500 |
* @param preChain
|
501 |
* @return number of elements
|
502 |
*/
|
503 |
|
504 |
@SuppressWarnings("unchecked") |
505 |
private int calculateSizeSep(List separators, String preChain) { |
506 |
|
507 |
String separator;
|
508 |
int posTemp = -1; |
509 |
int posi = Integer.MAX_VALUE; |
510 |
String sep = ""; |
511 |
for (int j = 0; j < separators.size(); j++) { |
512 |
separator = (String) separators.get(j);
|
513 |
posTemp = preChain.indexOf(separator); |
514 |
if (posTemp != -1 && posTemp < posi) { |
515 |
posi = posTemp; |
516 |
sep = separator; |
517 |
} |
518 |
posTemp = -1;
|
519 |
} |
520 |
|
521 |
return sep.length();
|
522 |
} |
523 |
|
524 |
/**
|
525 |
* This method loads the specifics separators of one Addresselement
|
526 |
*
|
527 |
* @param adrElem
|
528 |
* @return separators list
|
529 |
*/
|
530 |
@SuppressWarnings("unchecked") |
531 |
private List loadSpecificSeparators(Addresselement adrElem) { |
532 |
List separators = new ArrayList(); |
533 |
if (adrElem.getFieldseparator().getColonsep()) {
|
534 |
separators.add(",");
|
535 |
} |
536 |
if (adrElem.getFieldseparator().getSemicolonsep()) {
|
537 |
separators.add(";");
|
538 |
} |
539 |
if (adrElem.getFieldseparator().getTabsep()) {
|
540 |
separators.add("\t");
|
541 |
} |
542 |
if (adrElem.getFieldseparator().getSpacesep()) {
|
543 |
separators.add(" ");
|
544 |
} |
545 |
if (adrElem.getFieldseparator().getOthersep() != null |
546 |
&& adrElem.getFieldseparator().getOthersep() |
547 |
.compareToIgnoreCase("") != 0) { |
548 |
separators.add((String) adrElem.getFieldseparator().getOthersep());
|
549 |
} |
550 |
|
551 |
return separators;
|
552 |
} |
553 |
|
554 |
/**
|
555 |
* This method tests the numbers format
|
556 |
*
|
557 |
* @param str
|
558 |
* @param dec
|
559 |
* @param sep
|
560 |
* @return true if there aren't other characters
|
561 |
*/
|
562 |
private boolean test(String str, char dec, char sep) { |
563 |
String str2 = str.replaceAll("[0-9]", ""); |
564 |
str2 = str2.replaceAll("-", ""); |
565 |
|
566 |
String str3 = str2;
|
567 |
if (str2.indexOf(dec) >= 0) { |
568 |
int ind = str2.indexOf(String.valueOf(dec)); |
569 |
str3 = str2.substring(0, ind)
|
570 |
+ str2.substring(ind + 1, str2.length());
|
571 |
} |
572 |
String str4 = str3;
|
573 |
if (str3.indexOf(sep) >= 0) { |
574 |
int ind = str3.indexOf(String.valueOf(sep)); |
575 |
str4 = str3.substring(0, ind)
|
576 |
+ str3.substring(ind + 1, str3.length());
|
577 |
} |
578 |
|
579 |
return str4.length() == 0; |
580 |
} |
581 |
|
582 |
} |