Statistics
| Revision:

root / trunk / libraries / libGeocoding / src / org / gvsig / normalization / operations / NormAlgorithm.java @ 26245

History | View | Annotate | Download (14.2 KB)

1
/* gvSIG. Geographic Information System of the Valencian Government
2
 *
3
 * Copyright (C) 2007-2008 Infrastructures and Transports Department
4
 * of the Valencian Government (CIT)
5
 * 
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 * 
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
19
 * MA  02110-1301, USA.
20
 * 
21
 */
22

    
23
/*
24
 * AUTHORS (In addition to CIT):
25
 * 2008 Prodevelop S.L. main development
26
 */
27

    
28
package org.gvsig.normalization.operations;
29

    
30
import java.text.DecimalFormat;
31
import java.text.DecimalFormatSymbols;
32
import java.util.ArrayList;
33
import java.util.List;
34
import java.util.Locale;
35

    
36
import javax.swing.event.ChangeEvent;
37
import javax.swing.event.ChangeListener;
38

    
39
import org.apache.log4j.Logger;
40
import org.gvsig.normalization.patterns.Datevalue;
41
import org.gvsig.normalization.patterns.Decimalvalue;
42
import org.gvsig.normalization.patterns.Element;
43
import org.gvsig.normalization.patterns.Fieldtype;
44
import org.gvsig.normalization.patterns.Integervalue;
45
import org.gvsig.normalization.patterns.Patternnormalization;
46
import org.gvsig.normalization.patterns.Stringvalue;
47

    
48
/**
49
 * This class tokens strings
50
 * 
51
 * @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
52
 * @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
53
 * 
54
 */
55

    
56
public class NormAlgorithm {
57

    
58
        // ATTRIBUTES
59

    
60
        private static final Logger log = Logger.getLogger(NormAlgorithm.class);
61

    
62
        private Patternnormalization pat;
63

    
64
        private int numFields;
65

    
66
        @SuppressWarnings("unused")
67
        private int row;
68

    
69
        private Element[] elements;
70

    
71
        @SuppressWarnings("unchecked")
72
        private ArrayList listeners = new ArrayList();
73

    
74
        /**
75
         * Builder
76
         * 
77
         * @param _pat
78
         *            Normalization pattern
79
         */
80
        public NormAlgorithm(Patternnormalization _pat) {
81
                this.pat = _pat;
82
                this.elements = pat.getArrayElements();
83
                this.numFields = elements.length;
84
        }
85

    
86
        // METHODS
87

    
88
        /**
89
         * This method cuts a chain in several parts and they are returned in a List
90
         * 
91
         * @param _chain
92
         *            strings
93
         * @return list with the strings tokenized
94
         */
95
        @SuppressWarnings("unchecked")
96
        public List splitChain(String _chain) {
97
                String preChain = _chain;
98
                String postChain = "";
99
                List subStrings = new ArrayList();
100

    
101
                // EXCEPTIONAL CASES
102
                if (_chain == null) {
103
                        return subStrings;
104
                }
105

    
106
                else if (_chain.compareToIgnoreCase("") == 0) {
107
                        subStrings.add(_chain);
108
                        return subStrings;
109
                }
110

    
111
                // NORMAL CASE
112
                else {
113
                        int fw = 0;
114
                        int init = 0;
115
                        String subChain = "";
116

    
117
                        for (int i = 0; i < numFields; i++) {
118

    
119
                                if (preChain.length() == 0) {
120
                                        return subStrings;
121
                                } else {
122

    
123
                                        fw = elements[i].getFieldwidth();
124
                                        // Cut chain with fixed width
125
                                        if (fw > 0 && preChain.length() > fw) {
126
                                                subChain = preChain.substring(init, fw);
127
                                                subStrings.add(subChain);
128
                                                postChain = preChain.substring(fw);
129
                                                preChain = postChain;
130

    
131
                                        } else if (fw > 0 && preChain.length() <= fw) {
132
                                                subStrings.add(preChain);
133
                                                return subStrings;
134
                                        }
135
                                        // Cut chain with separators
136
                                        else {
137
                                                // Load specific separators
138
                                                List separators = loadSpecificSeparators(elements[i]);
139
                                                boolean join = withJoinSeparators(elements[i]);
140
                                                // Search the first delimiter in the chain
141
                                                int posi = calculatePosition(separators, preChain);
142
                                                int tamSep = calculateSizeSep(separators, preChain);
143
                                                // Firsts elements
144
                                                if (i < numFields - 1) {
145

    
146
                                                        if (join) {
147
                                                                while (posi == 0) {
148
                                                                        preChain = deleteFirst(preChain);
149
                                                                        posi = calculatePosition(separators,
150
                                                                                        preChain);
151
                                                                        if (preChain.length() == 0) {
152
                                                                                break;
153
                                                                        }
154
                                                                }
155
                                                                subChain = preChain.substring(0, posi);
156
                                                                try {
157
                                                                        postChain = preChain.substring(posi
158
                                                                                        + tamSep);
159
                                                                } catch (Exception e) {
160
                                                                        postChain = "";
161
                                                                }
162
                                                                subStrings.add(subChain);
163
                                                                preChain = postChain;
164
                                                        } else {
165
                                                                subChain = preChain.substring(0, posi);
166
                                                                postChain = preChain.substring(posi + tamSep);
167
                                                                subStrings.add(subChain);
168
                                                                preChain = postChain;
169
                                                        }
170

    
171
                                                }
172
                                                // Last element
173
                                                else {
174
                                                        subStrings.add(preChain);
175
                                                }
176
                                        }
177
                                }
178
                        }
179
                }
180
                return subStrings;
181
        }
182

    
183
        /**
184
         * This method cuts a chain in several parts from separators
185
         * 
186
         * @param chain
187
         *            string
188
         * @param fields
189
         *            fields number
190
         * @param separators
191
         *            array of characters
192
         * @param joinDelimiters
193
         *            with or without joinDelimiters
194
         * @return list with the strings tokenized
195
         */
196
        @SuppressWarnings("unchecked")
197
        public static List splitChainBySeparators(String chain, int fields,
198
                        String[] separators, boolean joinDelimiters) {
199

    
200
                List subStrings = new ArrayList();
201
                int posTemp = -1;
202
                String separator;
203
                String chain2 = chain;
204
                int campos = fields;
205

    
206
                // EXCEPTIONAL CASES
207
                if (chain.compareToIgnoreCase("") == 0 || campos == 0
208
                                || separators.length < 1) {
209
                        subStrings.add(chain);
210
                        return subStrings;
211
                }
212

    
213
                // NORMAL CASE
214
                else {
215

    
216
                        // Only (parts-1) loops
217
                        for (int i = 0; i < (campos - 1); i++) {
218
                                int posi = Integer.MAX_VALUE;
219
                                String firstChain;
220
                                for (int j = 0; j < separators.length; j++) {
221
                                        separator = separators[j];
222
                                        posTemp = chain2.indexOf(separator);
223
                                        if (posTemp != -1 && posTemp < posi) {
224
                                                posi = posTemp;
225
                                        }
226
                                        posTemp = -1;
227
                                }
228
                                if (posi == 0 && joinDelimiters) {
229
                                        campos++;
230
                                        chain2 = chain2.substring(posi + 1);
231
                                } else {
232
                                        firstChain = chain2.substring(0, posi);
233
                                        chain2 = chain2.substring(posi + 1);
234
                                        subStrings.add(firstChain);
235
                                        // In the last loop add the first chain and the rest of
236
                                        // chain
237
                                        if (i == (campos - 2)) {
238
                                                subStrings.add(chain2);
239
                                        }
240
                                }
241
                        }
242
                        return subStrings;
243
                }
244
        }
245

    
246
        /**
247
         * This method cuts a chain in several parts from fixed width
248
         * 
249
         * @param chain
250
         *            string
251
         * @param fieldWidth
252
         *            array with fields widths
253
         * @return list with the strings tokenized
254
         */
255
        @SuppressWarnings("unchecked")
256
        public static List splitChainByFixedWidth(String chain, int[] fieldWidth) {
257

    
258
                List subStrings = new ArrayList();
259
                int elements = fieldWidth.length;
260
                String subChain;
261
                int inicio = 0;
262
                int fin = 0;
263

    
264
                // EXCEPTIONAL CASES
265
                if (chain.compareToIgnoreCase("") == 0 || fieldWidth.length < 1) {
266
                        subStrings.add(chain);
267
                        return subStrings;
268
                }
269

    
270
                // NORMAL CASE
271
                else {
272
                        for (int i = 0; i < elements; i++) {
273
                                fin = fin + fieldWidth[i];
274
                                subChain = chain.substring(inicio, fin);
275
                                subStrings.add(subChain);
276
                                inicio = fin;
277
                        }
278
                        return subStrings;
279
                }
280
        }
281

    
282
        /**
283
         * This method filters the split chains with the in-separators
284
         * 
285
         * @param chains
286
         *            strings of the one row
287
         * @return strings filtered by type
288
         */
289
        @SuppressWarnings("unchecked")
290
        public List filterSplitChains(List chains) {
291

    
292
                DecimalFormat numForm = (DecimalFormat) DecimalFormat
293
                                .getInstance(Locale.getDefault());
294
                DecimalFormatSymbols simb = numForm.getDecimalFormatSymbols();
295

    
296
                Fieldtype nft = null;
297

    
298
                String decsep;
299
                char cdecsep;
300
                String thosep;
301
                char cthosep;
302
                String txsep;
303

    
304
                List postChain = new ArrayList();
305
                String aux = "";
306

    
307
                for (int i = 0; i < numFields; i++) {
308

    
309
                        nft = elements[i].getFieldtype();
310

    
311
                        decsep = elements[i].getInfieldseparators().getDecimalseparator().trim();
312
                        cdecsep = decsep.compareTo("") == 0 ? (char) 0x20 : decsep
313
                                        .charAt(0);
314
                        thosep = elements[i].getInfieldseparators().getThousandseparator()
315
                                        .trim();
316
                        cthosep = thosep.compareTo("") == 0 ? (char) 0x20 : thosep
317
                                        .charAt(0);
318

    
319
                        txsep = elements[i].getInfieldseparators().getTextseparator().trim();
320

    
321
                        if (txsep.compareToIgnoreCase("\"") == 0) {
322
                                txsep = "\"";
323
                        }
324

    
325
                        simb.setDecimalSeparator(cdecsep);
326
                        try {
327
                                simb.setGroupingSeparator(cthosep);
328
                        } catch (RuntimeException e1) {
329
                                log.error("Error setting the group separator", e1);
330
                        }
331
                        // Fill fields empties
332
                        if (chains.size() < numFields) {
333

    
334
                                for (int j = chains.size(); j < numFields; j++) {
335
                                        chains.add(j, "");
336
                                }
337
                        }
338

    
339
                        if (((Integervalue) nft.getIntegervalue()) != null
340
                                        || ((Decimalvalue) nft.getDecimalvalue()) != null) {
341
                                numForm.setDecimalFormatSymbols(simb);
342
                                try {
343
                                        String cadena = ((String) chains.get(i)).trim();
344
                                        int num = cadena.length();
345
                                        boolean comproba = test(cadena, cdecsep, cthosep);
346
                                        if (num > 0 && comproba) {
347
                                                aux = (numForm.parse(cadena)).toString().trim();
348
                                        } else {
349
                                                aux = "";
350
                                        }
351
                                } catch (Exception e) {
352
                                        aux = "";
353
                                }
354
                        }
355

    
356
                        // Field type (VARCHAR)
357
                        if (((Stringvalue) nft.getStringvalue()) != null) {
358
                                String cadena = (String) chains.get(i);
359
                                if (cadena != null) {
360
                                        aux = cadena.replace(txsep, "");
361
                                } else {
362
                                        aux = "";
363
                                }
364
                        }
365

    
366
                        // Field type (DATE)
367
                        if (((Datevalue) nft.getDatevalue()) != null) {
368
                                String cadena = (String) chains.get(i);
369
                                if (cadena.length() > 0 || cadena != null) {
370
                                        aux = cadena.replace(txsep, "");
371
                                } else {
372
                                        aux = "";
373
                                }
374
                        }
375
                        postChain.add(aux);
376
                }
377
                return postChain;
378

    
379
        }
380

    
381
        /**
382
         * This method registers the listeners
383
         * 
384
         * @param l
385
         *            listener
386
         */
387
        @SuppressWarnings("unchecked")
388
        public void registerListener(ChangeListener l) {
389
                this.listeners.add(l);
390
        }
391

    
392
        /**
393
         * This method remove the listener registred
394
         * 
395
         * @param l
396
         *            listener
397
         */
398
        public void removeListener(ChangeListener l) {
399
                this.listeners.remove(l);
400
        }
401

    
402
        /**
403
         * This method removes all listeners
404
         */
405
        public void removeAllListeners() {
406
                this.listeners.clear();
407
        }
408

    
409
        /**
410
         * 
411
         * @param evt
412
         *            event
413
         */
414
        public void update(ChangeEvent evt) {
415

    
416
                for (int i = 0; i < listeners.size(); i++) {
417
                        ((ChangeListener) listeners.get(i)).stateChanged(evt);
418
                }
419
        }
420

    
421
        /**
422
         * Add message
423
         * 
424
         * @param message
425
         */
426
        public void update(String message) {
427
                ChangeEvent evt = new ChangeEvent(message);
428
                update(evt);
429
        }
430

    
431
        /**
432
         * Set the row
433
         * 
434
         * @param _row
435
         */
436
        public void setRow(int _row) {
437
                row = _row;
438
        }
439

    
440
        /**
441
         * This method loads the join separators attribute of one Element
442
         * 
443
         * @param ad
444
         * @return with or without joinSeparators
445
         */
446
        private boolean withJoinSeparators(Element ad) {
447
                return ad.getFieldseparator().getJoinsep();
448
        }
449

    
450
        /**
451
         * This method deletes the first element of one substring
452
         * 
453
         * @param chain
454
         *            initial string
455
         * @return string
456
         */
457
        private String deleteFirst(String chain) {
458
                String del = chain.substring(1);
459
                return del;
460
        }
461

    
462
        /**
463
         * This method gets the first position in the string of the separators
464
         * group.
465
         * 
466
         * @param separators
467
         *            separators characters list
468
         * @param preChain
469
         *            initial string
470
         * @return
471
         */
472
        @SuppressWarnings("unchecked")
473
        private int calculatePosition(List separators, String preChain) {
474

    
475
                String separator;
476
                int posTemp = -1;
477
                int posi = Integer.MAX_VALUE;
478
                for (int j = 0; j < separators.size(); j++) {
479
                        separator = (String) separators.get(j);
480
                        posTemp = preChain.indexOf(separator);
481
                        if (posTemp != -1 && posTemp < posi) {
482
                                posi = posTemp;
483
                        }
484
                        posTemp = -1;
485
                }
486
                if (posi > preChain.length()) {
487
                        posi = preChain.length();
488
                }
489

    
490
                return posi;
491
        }
492

    
493
        /**
494
         * This method calculates the number of elements of one separators
495
         * 
496
         * @param separators
497
         * @param preChain
498
         * @return number of elements
499
         */
500

    
501
        @SuppressWarnings("unchecked")
502
        private int calculateSizeSep(List separators, String preChain) {
503

    
504
                String separator;
505
                int posTemp = -1;
506
                int posi = Integer.MAX_VALUE;
507
                String sep = "";
508
                for (int j = 0; j < separators.size(); j++) {
509
                        separator = (String) separators.get(j);
510
                        posTemp = preChain.indexOf(separator);
511
                        if (posTemp != -1 && posTemp < posi) {
512
                                posi = posTemp;
513
                                sep = separator;
514
                        }
515
                        posTemp = -1;
516
                }
517

    
518
                return sep.length();
519
        }
520

    
521
        /**
522
         * This method loads the specifics separators of one Element
523
         * 
524
         * @param adrElem
525
         * @return separators list
526
         */
527
        @SuppressWarnings("unchecked")
528
        private List loadSpecificSeparators(Element adrElem) {
529
                List separators = new ArrayList();
530
                if (adrElem.getFieldseparator().getColonsep()) {
531
                        separators.add(",");
532
                }
533
                if (adrElem.getFieldseparator().getSemicolonsep()) {
534
                        separators.add(";");
535
                }
536
                if (adrElem.getFieldseparator().getTabsep()) {
537
                        separators.add("\t");
538
                }
539
                if (adrElem.getFieldseparator().getSpacesep()) {
540
                        separators.add(" ");
541
                }
542
                if (adrElem.getFieldseparator().getOthersep() != null
543
                                && adrElem.getFieldseparator().getOthersep()
544
                                                .compareToIgnoreCase("") != 0) {
545
                        separators.add((String) adrElem.getFieldseparator().getOthersep());
546
                }
547

    
548
                return separators;
549
        }
550

    
551
        /**
552
         * This method tests the numbers format
553
         * 
554
         * @param str
555
         * @param dec
556
         * @param sep
557
         * @return true if there aren't other characters
558
         */
559
        private boolean test(String str, char dec, char sep) {
560
                String str2 = str.replaceAll("[0-9]", "");
561
                str2 = str2.replaceAll("-", "");
562

    
563
                String str3 = str2;
564
                if (str2.indexOf(dec) >= 0) {
565
                        int ind = str2.indexOf(String.valueOf(dec));
566
                        str3 = str2.substring(0, ind)
567
                                        + str2.substring(ind + 1, str2.length());
568
                }
569
                String str4 = str3;
570
                if (str3.indexOf(sep) >= 0) {
571
                        int ind = str3.indexOf(String.valueOf(sep));
572
                        str4 = str3.substring(0, ind)
573
                                        + str3.substring(ind + 1, str3.length());
574
                }
575

    
576
                return str4.length() == 0;
577
        }
578

    
579
}