svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.plugin / org.gvsig.xml2db / org.gvsig.xml2db.lib / org.gvsig.xml2db.lib.impl / src / main / java / org / gvsig / xml2db / lib / impl / Xml2dbCommons.java @ 47335
History | View | Annotate | Download (5.01 KB)
1 |
package org.gvsig.xml2db.lib.impl; |
---|---|
2 |
|
3 |
import java.io.BufferedInputStream; |
4 |
import java.io.BufferedReader; |
5 |
import java.io.File; |
6 |
import java.io.FileInputStream; |
7 |
import java.io.IOException; |
8 |
import java.io.InputStream; |
9 |
import java.io.InputStreamReader; |
10 |
import java.io.Reader; |
11 |
import java.nio.charset.Charset; |
12 |
import java.nio.charset.IllegalCharsetNameException; |
13 |
import java.nio.charset.UnsupportedCharsetException; |
14 |
import org.apache.commons.io.IOUtils; |
15 |
import org.apache.commons.io.input.BOMInputStream; |
16 |
import org.apache.commons.io.input.CloseShieldInputStream; |
17 |
import org.apache.commons.lang3.StringUtils; |
18 |
import org.apache.tika.config.TikaConfig; |
19 |
import org.apache.tika.detect.AutoDetectReader; |
20 |
import org.apache.tika.detect.EncodingDetector; |
21 |
import org.apache.tika.metadata.Metadata; |
22 |
import org.gvsig.tools.task.SimpleTaskStatus; |
23 |
import org.xml.sax.InputSource; |
24 |
|
25 |
/**
|
26 |
*
|
27 |
* @author jjdelcerro
|
28 |
*/
|
29 |
@SuppressWarnings("UseSpecificCatch") |
30 |
public class Xml2dbCommons { |
31 |
|
32 |
public static Charset detectCharset(InputStream is) { |
33 |
EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector(); |
34 |
BufferedInputStream bis = null; |
35 |
try {
|
36 |
bis = new BufferedInputStream(CloseShieldInputStream.wrap(is)); |
37 |
Charset charset = encodingDetector.detect(bis, new Metadata()); |
38 |
return charset;
|
39 |
} catch (Exception ex) { |
40 |
return null; |
41 |
} finally {
|
42 |
IOUtils.closeQuietly(bis); |
43 |
} |
44 |
} |
45 |
|
46 |
public static String detectCharsetName(InputStream is) { |
47 |
Charset charset = detectCharset(is);
|
48 |
if( charset==null ) { |
49 |
return null; |
50 |
} |
51 |
return charset.name();
|
52 |
} |
53 |
|
54 |
public static InputSource openReader(File xmlfile, Charset charset) { |
55 |
try {
|
56 |
FileInputStream fis = new FileInputStream(xmlfile); |
57 |
|
58 |
InputSource is = new InputSource();
|
59 |
is.setPublicId(xmlfile.getAbsolutePath()); |
60 |
is.setByteStream(fis); |
61 |
if( charset!=null ) { |
62 |
is.setEncoding(charset.name()); |
63 |
} |
64 |
return openReader(is);
|
65 |
} catch(Throwable t) { |
66 |
throw new RuntimeException("Can't open xml input stream.",t); |
67 |
} |
68 |
} |
69 |
|
70 |
public static InputSource openReader(InputStream xml, Charset charset) { |
71 |
InputSource is = new InputSource();
|
72 |
is.setByteStream(xml); |
73 |
if( charset!=null ) { |
74 |
is.setEncoding(charset.name()); |
75 |
} |
76 |
return openReader(is);
|
77 |
} |
78 |
|
79 |
public static InputSource openReader(InputSource is) { |
80 |
try {
|
81 |
if(StringUtils.isBlank(is.getEncoding())){
|
82 |
// EncodingDetector encodingDetector = TikaConfig.getDefaultConfig().getEncodingDetector();
|
83 |
// BufferedInputStream bis = new BufferedInputStream(is.getByteStream());
|
84 |
// Charset charset = encodingDetector.detect(bis, new Metadata());
|
85 |
// is.setEncoding(charset.name());
|
86 |
// is.setByteStream(bis);
|
87 |
AutoDetectReader reader = new AutoDetectReader(is.getByteStream());
|
88 |
is.setCharacterStream(reader); |
89 |
is.setEncoding(reader.getCharset().name()); |
90 |
} else {
|
91 |
BOMInputStream bomIs = new BOMInputStream(is.getByteStream());
|
92 |
is.setByteStream(bomIs); |
93 |
InputStreamReader reader = new InputStreamReader( |
94 |
is.getByteStream(), |
95 |
is.getEncoding() |
96 |
); |
97 |
is.setCharacterStream(reader); |
98 |
} |
99 |
return is;
|
100 |
} catch(Throwable t) { |
101 |
throw new RuntimeException("Can't open xml input stream.",t); |
102 |
} |
103 |
} |
104 |
|
105 |
public static long countLines(File xml, Charset charset, SimpleTaskStatus status) { |
106 |
try {
|
107 |
FileInputStream fis = new FileInputStream(xml); |
108 |
return countLines(fis, charset, status);
|
109 |
} catch(Throwable t) { |
110 |
throw new RuntimeException("Can't count lines.",t); |
111 |
} |
112 |
} |
113 |
|
114 |
public static long countLines(InputStream xml, Charset charset, SimpleTaskStatus status) { |
115 |
try {
|
116 |
long count = 0; |
117 |
// Reader reader = null;
|
118 |
BufferedReader br = null; |
119 |
status.setIndeterminate(); |
120 |
status.setCurValue(0);
|
121 |
try {
|
122 |
InputSource is = new InputSource(xml);
|
123 |
if( charset!=null ) { |
124 |
is.setEncoding(charset.name()); |
125 |
} |
126 |
is = openReader(is); |
127 |
br = new BufferedReader(is.getCharacterStream()); |
128 |
while( br.readLine()!=null ) { |
129 |
status.message("Calculating lines...");
|
130 |
status.incrementCurrentValue(); |
131 |
count++; |
132 |
} |
133 |
} finally {
|
134 |
IOUtils.closeQuietly(br); |
135 |
} |
136 |
return count;
|
137 |
} catch(Throwable t) { |
138 |
throw new RuntimeException("Can't count lines.",t); |
139 |
} |
140 |
} |
141 |
} |