Statistics
| Revision:

root / trunk / libraries / libGeocoding / src-filter / org / gvsig / data / vectorial / filter / PropertyDistanceImpl.java @ 23019

History | View | Annotate | Download (5.97 KB)

1
/* gvSIG. Geographic Information System of the Valencian Government
2
 *
3
 * Copyright (C) 2007-2008 Infrastructures and Transports Department
4
 * of the Valencian Government (CIT)
5
 * 
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 * 
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 * 
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 
19
 * MA  02110-1301, USA.
20
 * 
21
 */
22

    
23
/*
24
 * AUTHORS (In addition to CIT):
25
 * 2008 Prodevelop S.L  main development
26
 */
27

    
28
package org.gvsig.data.vectorial.filter;
29

    
30
import java.util.ArrayList;
31
import java.util.List;
32

    
33
import org.opengis.filter.Filter;
34
import org.opengis.filter.FilterVisitor;
35
import org.opengis.filter.expression.Expression;
36

    
37
/**
38
 * class
39
 * 
40
 * 
41
 * @author <a href="mailto:jsanz@prodevelop.es"> Jorge Gaspar Sanz Salinas</a>
42
 * @author <a href="mailto:vsanjaime@prodevelop.es"> Vicente Sanjaime Calvet</a>
43
 * 
44
 */
45

    
46
public class PropertyDistanceImpl implements PropertyDistance {
47

    
48
    private static final String REGEX_TOKENIZER = "\\s";
49
    private Expression attribute;
50
    private String pattern;
51
    private float threshold;
52

    
53
    public PropertyDistanceImpl(Expression arg0, Expression arg1,
54
            float threshold) {
55
        this.attribute = arg0;
56
        this.pattern = arg1.toString();
57
        this.threshold = threshold;
58
    }
59

    
60
    public PropertyDistanceImpl(Expression arg0, String arg1, float threshold) {
61
        this.attribute = arg0;
62
        this.pattern = arg1;
63
        this.threshold = threshold;
64
    }
65

    
66
    public Object accept(FilterVisitor visitor, Object extraData) {
67
        return visitor.visitNullFilter(extraData);
68
    }
69

    
70
    /**
71
     * Evaluate the text distance between a feature attribute value and
72
     */
73
    @SuppressWarnings("unchecked")
74
    public boolean evaluate(Object feature) {
75

    
76
        if (attribute == null) {
77
            return false;
78
        }
79

    
80
        Object value = attribute.evaluate(feature);
81

    
82
        if (null == value) {
83
            return false;
84
        }
85

    
86
        String[] valueChains = value.toString().split(REGEX_TOKENIZER);
87
        String[] pattChains = this.pattern.split(REGEX_TOKENIZER);
88

    
89
        DistanceFilterFactory2Impl fact = new DistanceFilterFactory2Impl();
90

    
91
        /**
92
         * Really evaluate the filter when we have just one string per input
93
         */
94
        if (valueChains.length == 1 && pattChains.length == 1) {
95
            int dist = getLevenshteinDistance(valueChains[0], pattChains[0]);
96
            double distNorm = 1.0 - dist / (pattern.length() * 1.0);
97
            return distNorm >= threshold;
98
        } else {
99
            /**
100
             * Otherwise we will iterate to assure all patters appear in one of
101
             * the value chains
102
             */
103
            Filter filtAND;
104
            List listAND = new ArrayList();
105
            for (int i = 0; i < pattChains.length; i++) {
106
                String patt = pattChains[i];
107
                Filter filtOR;
108
                List listOR = new ArrayList();
109
                for (int j = 0; j < valueChains.length; j++) {
110
                    Expression exp2 = fact.literal(valueChains[j]);
111
                    PropertyDistance dist = fact.distance(exp2, patt,
112
                            this.threshold);
113
                    listOR.add(dist);
114
                }
115
                filtOR = fact.or(listOR);
116
                listAND.add(filtOR);
117
            }
118
            filtAND = fact.and(listAND);
119
            return filtAND.evaluate(null);
120
        }
121
    }
122

    
123
    private int getLevenshteinDistance(String s, String t) {
124
        if (s == null || t == null) {
125
            throw new IllegalArgumentException("Strings must not be null");
126
        }
127

    
128
        /*
129
         * The difference between this impl. and the previous is that, rather
130
         * than creating and retaining a matrix of size s.length()+1 by
131
         * t.length()+1, we maintain two single-dimensional arrays of length
132
         * s.length()+1. The first, d, is the 'current working' distance array
133
         * that maintains the newest distance cost counts as we iterate through
134
         * the characters of String s. Each time we increment the index of
135
         * String t we are comparing, d is copied to p, the second int[]. Doing
136
         * so allows us to retain the previous cost counts as required by the
137
         * algorithm (taking the minimum of the cost count to the left, up one,
138
         * and diagonally up and to the left of the current cost count being
139
         * calculated). (Note that the arrays aren't really copied anymore, just
140
         * switched...this is clearly much better than cloning an array or doing
141
         * a System.arraycopy() each time through the outer loop.)
142
         * 
143
         * Effectively, the difference between the two implementations is this
144
         * one does not cause an out of memory condition when calculating the LD
145
         * over two very large strings.
146
         */
147

    
148
        int n = s.length(); // length of s
149
        int m = t.length(); // length of t
150

    
151
        if (n == 0) {
152
            return m;
153
        } else if (m == 0) {
154
            return n;
155
        }
156

    
157
        int p[] = new int[n + 1]; // 'previous' cost array, horizontally
158
        int d[] = new int[n + 1]; // cost array, horizontally
159
        int _d[]; // placeholder to assist in swapping p and d
160

    
161
        // indexes into strings s and t
162
        int i; // iterates through s
163
        int j; // iterates through t
164

    
165
        char t_j; // jth character of t
166

    
167
        int cost; // cost
168

    
169
        for (i = 0; i <= n; i++) {
170
            p[i] = i;
171
        }
172

    
173
        for (j = 1; j <= m; j++) {
174
            t_j = t.charAt(j - 1);
175
            d[0] = j;
176

    
177
            for (i = 1; i <= n; i++) {
178
                cost = s.charAt(i - 1) == t_j ? 0 : 1;
179
                // minimum of cell to the left+1, to the top+1, diagonally left
180
                // and up +cost
181
                d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1]
182
                        + cost);
183
            }
184

    
185
            // copy current distance counts to 'previous row' distance counts
186
            _d = p;
187
            p = d;
188
            d = _d;
189
        }
190

    
191
        // our last action in the above loop was to switch d and p, so p now
192
        // actually has the most recent cost counts
193
        return p[n];
194
    }
195

    
196
}