Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.compat.cdc / org.gvsig.fmap.dal / org.gvsig.fmap.dal.file / org.gvsig.fmap.dal.file.csv / src / main / java / org / gvsig / fmap / dal / store / csv / simplereaders / CSVReaderSuperCSV.java @ 47657

History | View | Annotate | Download (11.5 KB)

1
package org.gvsig.fmap.dal.store.csv.simplereaders;
2

    
3
import java.io.BufferedReader;
4
import java.io.File;
5
import java.io.FileInputStream;
6
import java.io.FileNotFoundException;
7
import java.io.FileReader;
8
import java.io.IOException;
9
import java.io.InputStreamReader;
10
import java.io.Reader;
11
import java.io.UnsupportedEncodingException;
12
import java.util.List;
13
import java.util.function.Function;
14
import org.apache.commons.io.FilenameUtils;
15
import org.apache.commons.io.IOUtils;
16
import org.apache.commons.io.input.CloseShieldReader;
17
import org.apache.commons.lang3.StringUtils;
18
import org.apache.commons.text.StringEscapeUtils;
19
import org.gvsig.fmap.dal.store.csv.CSVStoreParameters;
20
import org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileIndex;
21
import org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileReader;
22
import static org.gvsig.fmap.dal.store.simplereader.virtualrows.RandomAccessFileReader.FILTER_NONE;
23
import org.gvsig.fmap.dal.store.csv.virtualrows.SuperCSVList;
24
import org.gvsig.fmap.dal.store.simplereader.simplereaders.AbstractSimpleReader;
25
import org.gvsig.tools.dynobject.DynObject;
26
import org.gvsig.tools.task.SimpleTaskStatus;
27
import org.gvsig.tools.util.GetItemWithSize64;
28
import org.slf4j.Logger;
29
import org.slf4j.LoggerFactory;
30
import org.supercsv.comment.CommentStartsWith;
31
import org.supercsv.io.CsvListReader;
32
import org.supercsv.prefs.CsvPreference;
33
import org.supercsv.quote.QuoteMode;
34

    
35
public class CSVReaderSuperCSV extends AbstractSimpleReader {
36

    
37
    //
38
    // http://supercsv.sourceforge.net/examples_reading.html
39
    // http://supercsv.sourceforge.net/apidocs/index.html
40
    //
41
    private static final Logger LOGGER = LoggerFactory.getLogger(CSVReaderSuperCSV.class);
42

    
43
    private CsvListReader reader;
44
    private final CSVStoreParameters parameters;
45
    private List<String>  nextLine;
46
    private int columns;
47

    
48
    public CSVReaderSuperCSV(Reader in, CSVStoreParameters parameters) {
49
        this(parameters);
50
        this.reader = new CsvListReader(in, getCSVPreferences());
51
    }    
52
    
53
    public CSVReaderSuperCSV(CSVStoreParameters parameters) {
54
        this.reader = null;
55
        this.parameters = parameters;
56
        this.reader = null;
57
        this.nextLine = null;
58
        this.columns = 0;
59
    }
60

    
61
    public CSVStoreParameters getParameters() {
62
        return this.parameters;
63
    }
64

    
65
    @Override
66
    public String[] getHeader() throws IOException {
67
        return this.reader.getHeader(true);
68
    }
69
    
70
    @Override
71
    public int getColumnsCount() throws IOException {
72
        if( this.columns <= 0 ) {
73
            this.columns = reader.length();
74
            if( this.columns <= 0 ) {
75
                this.nextLine = this.reader.read();
76
                this.columns = reader.length();
77
            }
78
        }
79
        return this.columns;
80
    }
81
    
82
    private boolean hasMultilineRecords(SimpleTaskStatus status) throws FileNotFoundException, UnsupportedEncodingException, IOException {
83
        FileInputStream fis = null;
84
        InputStreamReader theReader = null;
85
        BufferedReader breader = null;
86
        try {
87
            CSVStoreParameters params = getParameters();
88
            File data_file = CSVStoreParameters.getFile(params);
89
            String charset = CSVStoreParameters.getCharset(params);
90
            fis = new FileInputStream(data_file);
91
            theReader = new InputStreamReader(fis, charset);
92
            breader = new BufferedReader(theReader);
93
            CsvListReader parser = new CsvListReader(breader, getCSVPreferences());
94
//        int firstRecordLine = 0;
95
            while (parser.read() != null) {
96
                if (parser.getLineNumber() != parser.getRowNumber()) {
97
                    return true;
98
                }
99
            }
100
            return false;
101
        } finally {
102
            IOUtils.closeQuietly(breader);
103
            IOUtils.closeQuietly(theReader);
104
            IOUtils.closeQuietly(fis);
105
        }
106
    }
107

    
108
    @Override
109
    public GetItemWithSize64<List<String>>  getVirtualRows(SimpleTaskStatus status) {
110
        RandomAccessFileReader theReader = null;
111
        RandomAccessFileIndex theIndex = null;
112
        try {
113
            CSVStoreParameters params = getParameters();
114
            File data_file = CSVStoreParameters.getFile(params);
115
            if( data_file.length()< 10*1024*1024 ) {
116
                return null;
117
            }
118
            String charset = CSVStoreParameters.getCharset(params);
119
            File index_file = getIndexFile(data_file);
120
            
121
            theReader = new RandomAccessFileReader(data_file, charset);
122

    
123
            Function<BufferedReader, Integer> numberOfLinesInRecord = null;
124

    
125
            if (theReader.isRecomemendedTheRecreationOfTheLinesIndex(index_file)) {
126
                if (this.hasMultilineRecords(status)) {
127
                    numberOfLinesInRecord = new Function<BufferedReader, Integer>() {
128
                        @Override
129
                        public Integer apply(BufferedReader breader) {
130
                            CloseShieldReader theReader = CloseShieldReader.wrap(breader);
131
                            CsvListReader parser = new CsvListReader(theReader, getCSVPreferences());
132
                            try {
133
                                List<String> values = parser.read();
134
                            } catch (IOException ex) {
135
                                return 1;
136
                            }
137
                            return parser.getLineNumber();
138
                        }
139
                    };
140
                }
141
                theIndex = theReader.createOrOpenIndexOfLines(index_file, false, FILTER_NONE, status, numberOfLinesInRecord);
142
            } else {
143
                theIndex = new RandomAccessFileIndex(index_file);
144
            }
145

    
146
            SuperCSVList list = new SuperCSVList(
147
                    theReader, 
148
                    theIndex, 
149
                    CSVStoreParameters.isFirstLineHeader(getParameters())?1:0
150
            );
151
            
152
            list.setPreferences(this.getCSVPreferences());
153
            return list;
154
        } catch (IOException ex) {
155
            return null;
156
        } finally {
157
            // We do not close the index or the reader because we need it to remain open
158
//            IOUtils.closeQuietly(theReader);
159
//            IOUtils.closeQuietly(theIndex);
160
        }
161
    }
162
    
163
    @Override
164
    public List<String> read() throws IOException {
165
        List<String> line;
166
        if( this.nextLine != null ) {
167
            line = this.nextLine;
168
            this.nextLine = null;
169
        } else {
170
            line = this.reader.read();
171
        }
172
        if( line!=null ) {
173
            for (int i = 0; i < line.size(); i++) {
174
                String s = line.get(i);
175
                if( s!=null ) {
176
                    line.set(i, unescapeCRLF(s));
177
                }
178
            }
179
        }
180
        return line;
181
    }
182

    
183
    @Override
184
    public void close() throws IOException {
185
        this.reader.close();
186
    }
187

    
188
    @Override
189
    public List<String> skip(int lines) throws IOException {
190
        if( lines <= 0 ) {
191
            return null;
192
        }
193
        if( this.nextLine != null ) {
194
            this.nextLine = null;
195
            lines--;
196
        }
197
        List<String> row = null;
198
        for ( int i = 0; i < lines; i++ ) {
199
            row = reader.read();
200
        }
201
        return row;
202
    }
203

    
204
    public final CsvPreference getCSVPreferences() {
205
        try {
206
            String s;
207
            char quoteChar;
208
            int delimiterChar;
209
            String endOfLineSymbols;
210

    
211
            DynObject params = this.getParameters();
212

    
213
            CsvPreference.Builder builder;
214

    
215
            CsvPreference defaultPreference = CSVStoreParameters
216
                    .getPredefinedCSVPreferences(params);
217
            if ( defaultPreference == null ) {
218
                defaultPreference = CsvPreference.STANDARD_PREFERENCE;
219
            }
220

    
221
            endOfLineSymbols = CSVStoreParameters.getRecordSeparator(params);
222
            if ( StringUtils.isBlank(endOfLineSymbols) ) {
223
                endOfLineSymbols = defaultPreference.getEndOfLineSymbols();
224
            }
225
            s = CSVStoreParameters.getQuoteCharacter(params);
226
            if ( StringUtils.isBlank(s) ) {
227
                quoteChar = (char) defaultPreference.getQuoteChar();
228
            } else {
229
                quoteChar = s.charAt(0);
230
            }
231
            s = CSVStoreParameters.getDelimiter(params);
232
            if ( StringUtils.isBlank(s) ) {
233
                delimiterChar = defaultPreference.getDelimiterChar();
234
            } else {
235
                delimiterChar = s.charAt(0);
236
            }
237

    
238
            builder = new CsvPreference.Builder(quoteChar, delimiterChar,
239
                    endOfLineSymbols);
240

    
241
            s = CSVStoreParameters.getCommentStartMarker(params);
242
            if ( !StringUtils.isBlank(s) ) {
243
                CommentStartsWith cs = new CommentStartsWith(s);
244
                builder.skipComments(cs);
245
            }
246

    
247
            builder.surroundingSpacesNeedQuotes(CSVStoreParameters
248
                    .getSurroundingSpacesNeedQuotes(params));
249
            QuoteMode quoteMode = CSVStoreParameters.getQuoteMode(params);
250
            if ( quoteMode != null ) {
251
                builder.useQuoteMode(quoteMode);
252
            }
253
            return builder.build();
254
        } catch (Exception e) {
255
            LOGGER.warn("Can't make preferences for CSV '" + getFullFileName()
256
                    + "'.", e);
257
            return null;
258
        }
259
    }
260
    
261
    private String getFullFileName() {
262
        // Usar solo para mostrar mensajes en el logger.
263
        String s;
264
        try {
265
            s = getParameters().getFile().getAbsolutePath();
266
        } catch (Exception e2) {
267
            s = "(unknow)";
268
        }
269
        return s;        
270
    }
271

    
272
    @Override
273
    public int getLine() {
274
        if( this.reader==null ) {
275
            return 0;
276
        }
277
        return this.reader.getLineNumber();
278
    }
279

    
280
    @Override
281
    public List<String> nextRowValues() {
282
        try {
283
            return this.read();
284
        } catch (IOException ex) {
285
            throw new RuntimeException(ex);
286
        }
287
    }
288

    
289
    public static String escapeCRLF(String s) {
290
        if( s==null ) {
291
            return s;
292
        }
293
        String s1 = s;
294
        s1 = StringUtils.replace(s1, "\\", "\\\\");
295
        s1 = StringUtils.replace(s1, "\n", "\\n");
296
        s1 = StringUtils.replace(s1, "\r", "\\r");
297
        return s1;
298
    }
299
    
300
    public static String unescapeCRLF(String s) {
301
        if( s==null || s.indexOf('\\')==-1 ) {
302
            return s;
303
        }
304
        String s1 = s;
305
        s1 = s.replaceAll("(?:^\\\\n)|(?:([^\\\\])\\\\n)","$1\n");
306
        s1 = s1.replaceAll("(?:^\\\\r)|(?:([^\\\\])\\\\n)","$1\r");
307
        s1 = StringUtils.replace(s1, "\\\\", "\\");
308
        return s1;
309
    }
310
    
311
    
312
    public static void main(String[] args) {
313
        String s0 = "\\n{\\n   \"ANGULO\":\"0.000\",\\n    \"\tEXTO\":\"RAVAL ROIG\\\\r\\\\n - \\\\r\\\\nVIRGEN DEL SOCORRO\",\\n    \"LINK_POLIGONO\":\"HVCSGISCODE_ENT_11230100000000001\",\\n    \"GEOMETRY\":\"00000000014125fe9b57b4a23441503411cb1c432d\"\\n}";
314
        System.out.println("#"+s0+"#");
315
        String s1 = s0.replaceAll("(?:^\\\\n)|(?:([^\\\\])\\\\n)","$1\n");
316
        System.out.println("#"+s1+"#");
317
        String s2 = s1.replaceAll("([^\\\\])\\\\r","$1{r}");
318
        System.out.println("#"+s2+"#");
319
        String s3 = StringEscapeUtils.unescapeCsv(s0);
320
        System.out.println("#"+s3+"#");
321
    }
322
    
323
    
324
    public static File getIndexFile(File data_file) {
325
        if (data_file == null){
326
            return null;
327
        }
328
        File index_file = new File(FilenameUtils.removeExtension(data_file.getAbsolutePath()) + ".idx");
329
        return index_file;
330
    }
331

    
332
}