Statistics
| Revision:

root / trunk / extensions / extNormalization / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 22823

History | View | Annotate | Download (14.3 KB)

1
/* gvSIG. Geographic Information System of the Valencian Government
2
 *
3
 * Copyright (C) 2007-2008 Infrastructures and Transports Department
4
 * of the Valencian Government (CIT)
5
 * 
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 * 
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
19
 * MA  02110-1301, USA.
20
 * 
21
 */
22

    
23
/*
24
 * AUTHORS (In addition to CIT):
25
 * 2008 Prodevelop S.L. main development
26
 */
27

    
28
package org.gvsig.normalization.operations;
29

    
30
import java.text.DecimalFormat;
31
import java.text.DecimalFormatSymbols;
32
import java.util.ArrayList;
33
import java.util.List;
34
import java.util.Locale;
35

    
36
import javax.swing.event.ChangeEvent;
37
import javax.swing.event.ChangeListener;
38

    
39
import org.apache.log4j.Logger;
40
import org.gvsig.geocoding.patterns.normalization.Addresselement;
41
import org.gvsig.geocoding.patterns.normalization.Datevalue;
42
import org.gvsig.geocoding.patterns.normalization.Decimalvalue;
43
import org.gvsig.geocoding.patterns.normalization.Integervalue;
44
import org.gvsig.geocoding.patterns.normalization.Newfieldtype;
45
import org.gvsig.geocoding.patterns.normalization.Patternnormalization;
46
import org.gvsig.geocoding.patterns.normalization.Stringvalue;
47

    
48
/**
49
 * This class tokens strings
50
 * 
51
 * @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
52
 * @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
53
 * 
54
 */
55

    
56
public class NormAlgorithm {
57

    
58
        // ATTRIBUTES
59

    
60
        private static final Logger log = Logger.getLogger(NormAlgorithm.class);
61

    
62
        private Patternnormalization pat;
63

    
64
        private int numFields;
65

    
66
        private int row;
67

    
68
        private Addresselement[] aad;
69

    
70
        @SuppressWarnings("unchecked")
71
        private ArrayList listeners = new ArrayList();
72

    
73
        /**
74
         * Builder
75
         * 
76
         * @param _pat
77
         *            Normalization pattern
78
         */
79
        public NormAlgorithm(Patternnormalization _pat) {
80
                pat = _pat;
81
                aad = pat.getAddresselements().getAddresselement();
82
                numFields = aad.length;
83

    
84
        }
85

    
86
        // METHODS
87

    
88
        /**
89
         * This method cuts a chain in several parts and they are returned in a List
90
         * 
91
         * @param _chain
92
         *            strings
93
         * @return list with the strings tokenized
94
         */
95
        @SuppressWarnings("unchecked")
96
        public List splitChain(String _chain) {
97
                String preChain = _chain;
98
                String postChain = "";
99
                List subStrings = new ArrayList();
100

    
101
                // EXCEPTIONAL CASES
102
                if(_chain == null){
103
                        return subStrings;
104
                }
105
                
106
                else if (_chain.compareToIgnoreCase("") == 0) {
107
                        subStrings.add(_chain);
108
                        return subStrings;
109
                }
110
                
111

    
112
                // NORMAL CASE
113
                else {
114
                        int fw = 0;
115
                        int init = 0;
116
                        String subChain = "";
117

    
118
                        for (int i = 0; i < numFields; i++) {
119

    
120
                                if (preChain.length() == 0) {
121
                                        return subStrings;
122
                                } else {
123

    
124
                                        fw = aad[i].getFieldwidth();
125
                                        // Cut chain with fixed width
126
                                        if (fw > 0 && preChain.length() > fw) {
127
                                                subChain = preChain.substring(init, fw);
128
                                                subStrings.add(subChain);
129
                                                postChain = preChain.substring(fw);
130
                                                preChain = postChain;
131

    
132
                                        } else if (fw > 0 && preChain.length() <= fw) {
133
                                                subStrings.add(preChain);
134
                                                return subStrings;
135
                                        }
136
                                        // Cut chain with separators
137
                                        else {
138
                                                // Load specific separators
139
                                                List separators = loadSpecificSeparators(aad[i]);
140
                                                boolean join = withJoinSeparators(aad[i]);
141
                                                // Search the first delimiter in the chain
142
                                                int posi = calculatePosition(separators, preChain);
143
                                                int tamSep = calculateSizeSep(separators, preChain);
144
                                                // Firsts elements
145
                                                if (i < numFields - 1) {
146

    
147
                                                        if (join) {
148
                                                                while (posi == 0) {
149
                                                                        preChain = deleteFirst(preChain);
150
                                                                        posi = calculatePosition(separators,
151
                                                                                        preChain);
152
                                                                        if(preChain.length() == 0){
153
                                                                                break;
154
                                                                        }
155
                                                                }
156
                                                                subChain = preChain.substring(0, posi);
157
                                                                try {
158
                                                                        postChain = preChain.substring(posi
159
                                                                                        + tamSep);
160
                                                                } catch (Exception e) {
161
                                                                        postChain = "";
162
                                                                }
163
                                                                subStrings.add(subChain);
164
                                                                preChain = postChain;
165
                                                        } else {
166
                                                                subChain = preChain.substring(0, posi);
167
                                                                postChain = preChain.substring(posi + tamSep);
168
                                                                subStrings.add(subChain);
169
                                                                preChain = postChain;
170
                                                        }
171

    
172
                                                }
173
                                                // Last element
174
                                                else {
175
                                                        subStrings.add(preChain);
176
                                                }
177
                                        }
178
                                }
179
                        }
180
                }
181
                return subStrings;
182
        }
183

    
184
        /**
185
         * This method cuts a chain in several parts from separators
186
         * 
187
         * @param chain
188
         *            string
189
         * @param fields
190
         *            fields number
191
         * @param separators
192
         *            array of characters
193
         * @param joinDelimiters
194
         *            with or without joinDelimiters
195
         * @return list with the strings tokenized
196
         */
197
        @SuppressWarnings("unchecked")
198
        public static List splitChainBySeparators(String chain, int fields,
199
                        String[] separators, boolean joinDelimiters) {
200

    
201
                List subStrings = new ArrayList();
202
                int posTemp = -1;
203
                String separator;
204
                String chain2 = chain;
205
                int campos = fields;
206

    
207
                // EXCEPTIONAL CASES
208
                if (chain.compareToIgnoreCase("") == 0 || campos == 0
209
                                || separators.length < 1) {
210
                        subStrings.add(chain);
211
                        return subStrings;
212
                }
213

    
214
                // NORMAL CASE
215
                else {
216

    
217
                        // Only (parts-1) loops
218
                        for (int i = 0; i < (campos - 1); i++) {
219
                                int posi = Integer.MAX_VALUE;
220
                                String firstChain;
221
                                for (int j = 0; j < separators.length; j++) {
222
                                        separator = separators[j];
223
                                        posTemp = chain2.indexOf(separator);
224
                                        if (posTemp != -1 && posTemp < posi) {
225
                                                posi = posTemp;
226
                                        }
227
                                        posTemp = -1;
228
                                }
229
                                if (posi == 0 && joinDelimiters) {
230
                                        campos++;
231
                                        chain2 = chain2.substring(posi + 1);
232
                                } else {
233
                                        firstChain = chain2.substring(0, posi);
234
                                        chain2 = chain2.substring(posi + 1);
235
                                        subStrings.add(firstChain);
236
                                        // In the last loop add the first chain and the rest of
237
                                        // chain
238
                                        if (i == (campos - 2)) {
239
                                                subStrings.add(chain2);
240
                                        }
241
                                }
242
                        }
243
                        return subStrings;
244
                }
245
        }
246

    
247
        /**
248
         * This method cuts a chain in several parts from fixed width
249
         * 
250
         * @param chain
251
         *            string
252
         * @param fieldWidth
253
         *            array with fields widths
254
         * @return list with the strings tokenized
255
         */
256
        @SuppressWarnings("unchecked")
257
        public static List splitChainByFixedWidth(String chain, int[] fieldWidth) {
258

    
259
                List subStrings = new ArrayList();
260
                int elements = fieldWidth.length;
261
                String subChain;
262
                int inicio = 0;
263
                int fin = 0;
264

    
265
                // EXCEPTIONAL CASES
266
                if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) {
267
                        subStrings.add(chain);
268
                        return subStrings;
269
                }
270

    
271
                // NORMAL CASE
272
                else {
273
                        for (int i = 0; i < elements; i++) {
274
                                fin = fin + fieldWidth[i];
275
                                subChain = chain.substring(inicio, fin);
276
                                subStrings.add(subChain);
277
                                inicio = fin;
278
                        }
279
                        return subStrings;
280
                }
281
        }
282

    
283
        /**
284
         * This method filters the split chains with the in-separators
285
         * 
286
         * @param chains
287
         *            strings of the one row
288
         * @return strings filtered by type
289
         */
290
        @SuppressWarnings("unchecked")
291
        public List filterSplitChains(List chains) {
292

    
293
                DecimalFormat numForm = (DecimalFormat) DecimalFormat
294
                                .getInstance(Locale.getDefault());
295
                DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
296

    
297
                Newfieldtype nft = null;
298

    
299
                String decsep;
300
                char cdecsep;
301
                String thosep;
302
                char cthosep;
303
                String txsep;
304

    
305
                List postChain = new ArrayList();
306
                String aux = "";
307

    
308
                for (int i = 0; i < numFields; i++) {
309

    
310
                        nft = aad[i].getNewfieldtype();
311

    
312
                        decsep = aad[i].getInfieldseparators().getDecimalseparator()
313
                                        .getValue().trim();
314
                        cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep
315
                                        .charAt(0);
316
                        thosep = aad[i].getInfieldseparators().getThousandseparator()
317
                                        .getValue().trim();
318
                        cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep
319
                                        .charAt(0);
320

    
321
                        txsep = aad[i].getInfieldseparators().getTextseparator().getValue()
322
                                        .trim();
323

    
324
                        if (txsep.compareToIgnoreCase("\"") == 0) {
325
                                txsep = "\"";
326
                        }
327

    
328
                        simb.setDecimalSeparator(cdecsep);
329
                        try {
330
                                simb.setGroupingSeparator(cthosep);
331
                        } catch (RuntimeException e1) {
332
                                log.error("Error setting the group separator", e1);
333
                        }
334
                        // Fill fields empties
335
                        if (chains.size() < numFields) {
336

    
337
                                for (int j = chains.size(); j < numFields; j++) {
338
                                        chains.add(j, "");
339
                                }
340
                        }
341
                
342
                        if (((Integervalue) nft.getIntegervalue()) != null
343
                                        || ((Decimalvalue) nft.getDecimalvalue()) != null) {
344
                                numForm.setDecimalFormatSymbols(simb);
345
                                try {
346
                                        String cadena = (String) chains.get(i);
347
                                        int num = cadena.length();
348
                                        boolean comproba = test(cadena, cdecsep, cthosep);
349
                                        if (num > 0 && comproba) {
350
                                                aux = (numForm.parse(cadena)).toString().trim();
351
                                        } else {
352
                                                aux = "";
353
                                        }
354
                                } catch (Exception e) {
355
                                        aux = "";
356
                                }
357
                        }
358

    
359
                        // Field type (VARCHAR)
360
                        if (((Stringvalue) nft.getStringvalue()) != null) {
361
                                String cadena = (String) chains.get(i);
362
                                if (cadena != null) {
363
                                        aux = cadena.replace(txsep, "");
364
                                } else {
365
                                        aux = "";
366
                                }
367
                        }
368

    
369
                        // Field type (DATE)
370
                        if (((Datevalue) nft.getDatevalue()) != null) {
371
                                String cadena = (String) chains.get(i);
372
                                if (cadena.length() > 0 || cadena != null) {
373
                                        aux = cadena.replace(txsep, "");
374
                                } else {
375
                                        aux = "";
376
                                }
377
                        }
378
                        postChain.add(aux);
379
                }
380
                return postChain;
381

    
382
        }
383

    
384
        /**
385
         * This method registers the listeners
386
         * 
387
         * @param l
388
         *            listener
389
         */
390
        @SuppressWarnings("unchecked")
391
        public void registerListener(ChangeListener l) {
392
                this.listeners.add(l);
393
        }
394

    
395
        /**
396
         * This method remove the listener registred
397
         * 
398
         * @param l
399
         *            listener
400
         */
401
        public void removeListener(ChangeListener l) {
402
                this.listeners.remove(l);
403
        }
404

    
405
        /**
406
         * This method removes all listeners
407
         */
408
        public void removeAllListeners() {
409
                this.listeners.clear();
410
        }
411

    
412
        /**
413
         * 
414
         * @param evt
415
         *            event
416
         */
417
        public void update(ChangeEvent evt) {
418

    
419
                for (int i = 0; i < listeners.size(); i++) {
420
                        ((ChangeListener) listeners.get(i)).stateChanged(evt);
421
                }
422
        }
423

    
424
        /**
425
         * Add message
426
         * 
427
         * @param message
428
         */
429
        public void update(String message) {
430
                ChangeEvent evt = new ChangeEvent(message);
431
                update(evt);
432
        }
433

    
434
        /**
435
         * Set the row
436
         * 
437
         * @param _row
438
         */
439
        public void setRow(int _row) {
440
                row = _row;
441
        }
442

    
443
        /**
444
         * This method loads the join separators attribute of one Addresselement
445
         * 
446
         * @param ad
447
         * @return with or without joinSeparators
448
         */
449
        private boolean withJoinSeparators(Addresselement ad) {
450
                return ad.getFieldseparator().getJoinsep();
451
        }
452

    
453
        /**
454
         * This method deletes the first element of one substring
455
         * 
456
         * @param chain
457
         *            initial string
458
         * @return string
459
         */
460
        private String deleteFirst(String chain) {
461
                String del = chain.substring(1);
462
                return del;
463
        }
464

    
465
        /**
466
         * This method gets the first position in the string of the separators
467
         * group.
468
         * 
469
         * @param separators
470
         *            separators characters list
471
         * @param preChain
472
         *            initial string
473
         * @return
474
         */
475
        @SuppressWarnings("unchecked")
476
        private int calculatePosition(List separators, String preChain) {
477

    
478
                String separator;
479
                int posTemp = -1;
480
                int posi = Integer.MAX_VALUE;
481
                for (int j = 0; j < separators.size(); j++) {
482
                        separator = (String) separators.get(j);
483
                        posTemp = preChain.indexOf(separator);
484
                        if (posTemp != -1 && posTemp < posi) {
485
                                posi = posTemp;
486
                        }
487
                        posTemp = -1;
488
                }
489
                if (posi > preChain.length()) {
490
                        posi = preChain.length();
491
                }
492

    
493
                return posi;
494
        }
495

    
496
        /**
497
         * This method calculates the number of elements of one separators
498
         * 
499
         * @param separators
500
         * @param preChain
501
         * @return number of elements
502
         */
503

    
504
        @SuppressWarnings("unchecked")
505
        private int calculateSizeSep(List separators, String preChain) {
506

    
507
                String separator;
508
                int posTemp = -1;
509
                int posi = Integer.MAX_VALUE;
510
                String sep = "";
511
                for (int j = 0; j < separators.size(); j++) {
512
                        separator = (String) separators.get(j);
513
                        posTemp = preChain.indexOf(separator);
514
                        if (posTemp != -1 && posTemp < posi) {
515
                                posi = posTemp;
516
                                sep = separator;
517
                        }
518
                        posTemp = -1;
519
                }
520

    
521
                return sep.length();
522
        }
523

    
524
        /**
525
         * This method loads the specifics separators of one Addresselement
526
         * 
527
         * @param adrElem
528
         * @return separators list
529
         */
530
        @SuppressWarnings("unchecked")
531
        private List loadSpecificSeparators(Addresselement adrElem) {
532
                List separators = new ArrayList();
533
                if (adrElem.getFieldseparator().getColonsep()) {
534
                        separators.add(",");
535
                }
536
                if (adrElem.getFieldseparator().getSemicolonsep()) {
537
                        separators.add(";");
538
                }
539
                if (adrElem.getFieldseparator().getTabsep()) {
540
                        separators.add("\t");
541
                }
542
                if (adrElem.getFieldseparator().getSpacesep()) {
543
                        separators.add(" ");
544
                }
545
                if (adrElem.getFieldseparator().getOthersep() != null
546
                                && adrElem.getFieldseparator().getOthersep()
547
                                                .compareToIgnoreCase("") != 0) {
548
                        separators.add((String) adrElem.getFieldseparator().getOthersep());
549
                }
550

    
551
                return separators;
552
        }
553

    
554
        /**
555
         * This method tests the numbers format
556
         * 
557
         * @param str
558
         * @param dec
559
         * @param sep
560
         * @return true if there aren't other characters
561
         */
562
        private boolean test(String str, char dec, char sep) {
563
                String str2 = str.replaceAll("[0-9]", "");
564
                str2 = str2.replaceAll("-", "");
565

    
566
                String str3 = str2;
567
                if (str2.indexOf(dec) >= 0) {
568
                        int ind = str2.indexOf(String.valueOf(dec));
569
                        str3 = str2.substring(0, ind)
570
                                        + str2.substring(ind + 1, str2.length());
571
                }
572
                String str4 = str3;
573
                if (str3.indexOf(sep) >= 0) {
574
                        int ind = str3.indexOf(String.valueOf(sep));
575
                        str4 = str3.substring(0, ind)
576
                                        + str3.substring(ind + 1, str3.length());
577
                }
578

    
579
                return str4.length() == 0;
580
        }
581

    
582
}