Statistics
| Revision:

svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.plugin / org.gvsig.xml2db / org.gvsig.xml2db.lib / org.gvsig.xml2db.lib.impl / src / main / java / org / gvsig / xml2db / lib / impl / Xml2dbCommons.java @ 47335

History | View | Annotate | Download (5.01 KB)

1
package org.gvsig.xml2db.lib.impl;
2

    
3
import java.io.BufferedInputStream;
4
import java.io.BufferedReader;
5
import java.io.File;
6
import java.io.FileInputStream;
7
import java.io.IOException;
8
import java.io.InputStream;
9
import java.io.InputStreamReader;
10
import java.io.Reader;
11
import java.nio.charset.Charset;
12
import java.nio.charset.IllegalCharsetNameException;
13
import java.nio.charset.UnsupportedCharsetException;
14
import org.apache.commons.io.IOUtils;
15
import org.apache.commons.io.input.BOMInputStream;
16
import org.apache.commons.io.input.CloseShieldInputStream;
17
import org.apache.commons.lang3.StringUtils;
18
import org.apache.tika.config.TikaConfig;
19
import org.apache.tika.detect.AutoDetectReader;
20
import org.apache.tika.detect.EncodingDetector;
21
import org.apache.tika.metadata.Metadata;
22
import org.gvsig.tools.task.SimpleTaskStatus;
23
import org.xml.sax.InputSource;
24

    
25
/**
26
 *
27
 * @author jjdelcerro
28
 */
29
@SuppressWarnings("UseSpecificCatch")
30
public class Xml2dbCommons {
31

    
32
    public static Charset detectCharset(InputStream is) {   
33
        EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
34
        BufferedInputStream bis = null;
35
        try {
36
            bis = new BufferedInputStream(CloseShieldInputStream.wrap(is));
37
            Charset charset = encodingDetector.detect(bis, new Metadata());
38
            return charset;
39
        } catch (Exception ex) {
40
            return null;
41
        } finally {
42
            IOUtils.closeQuietly(bis);
43
        }
44
    }
45
    
46
    public static String detectCharsetName(InputStream is) {        
47
        Charset charset = detectCharset(is);
48
        if( charset==null ) {
49
            return null;
50
        }
51
        return charset.name();
52
    }
53
    
54
    public static InputSource openReader(File xmlfile, Charset charset) {
55
        try {
56
            FileInputStream fis = new FileInputStream(xmlfile);
57

    
58
            InputSource is = new InputSource();
59
            is.setPublicId(xmlfile.getAbsolutePath());
60
            is.setByteStream(fis);
61
            if( charset!=null ) {
62
                is.setEncoding(charset.name());
63
            }            
64
            return openReader(is);
65
        } catch(Throwable t) {
66
            throw new RuntimeException("Can't open xml input stream.",t);
67
        }
68
    }
69
    
70
    public static InputSource openReader(InputStream xml, Charset charset) {
71
            InputSource is = new InputSource();
72
            is.setByteStream(xml);
73
            if( charset!=null ) {
74
                is.setEncoding(charset.name());
75
            }            
76
            return openReader(is);
77
    }
78
    
79
    public static InputSource openReader(InputSource is) {
80
        try {            
81
            if(StringUtils.isBlank(is.getEncoding())){
82
//                EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
83
//                BufferedInputStream bis = new BufferedInputStream(is.getByteStream());
84
//                Charset charset = encodingDetector.detect(bis, new Metadata());
85
//                is.setEncoding(charset.name());
86
//                is.setByteStream(bis);
87
                AutoDetectReader reader = new AutoDetectReader(is.getByteStream());
88
                is.setCharacterStream(reader);
89
                is.setEncoding(reader.getCharset().name());
90
            } else {
91
                BOMInputStream bomIs = new BOMInputStream(is.getByteStream());
92
                is.setByteStream(bomIs);
93
                InputStreamReader reader = new InputStreamReader(
94
                        is.getByteStream(),
95
                        is.getEncoding()
96
                );
97
                is.setCharacterStream(reader);
98
            }
99
            return is;
100
        } catch(Throwable t) {
101
            throw new RuntimeException("Can't open xml input stream.",t);
102
        }
103
    }
104

    
105
    public static long countLines(File xml, Charset charset, SimpleTaskStatus status) {
106
        try {
107
            FileInputStream fis = new FileInputStream(xml);
108
            return countLines(fis, charset, status);
109
        } catch(Throwable t) {
110
            throw new RuntimeException("Can't count lines.",t);
111
        }
112
    }
113

    
114
    public static long countLines(InputStream xml, Charset charset, SimpleTaskStatus status) {
115
        try {
116
            long count = 0;
117
//            Reader reader = null;
118
            BufferedReader br = null;
119
            status.setIndeterminate();
120
            status.setCurValue(0);
121
            try {
122
                InputSource is = new InputSource(xml);
123
                if( charset!=null ) {
124
                    is.setEncoding(charset.name());
125
                }
126
                is = openReader(is);
127
                br = new BufferedReader(is.getCharacterStream());
128
                while( br.readLine()!=null ) {
129
                    status.message("Calculating lines...");
130
                    status.incrementCurrentValue();
131
                    count++;
132
                }
133
            } finally {
134
                IOUtils.closeQuietly(br);
135
            }
136
            return count;
137
        } catch(Throwable t) {
138
            throw new RuntimeException("Can't count lines.",t);
139
        }
140
    }    
141
}