svn-gvsig-desktop / trunk / org.gvsig.desktop / org.gvsig.desktop.library / org.gvsig.utils / src / main / java / org / gvsig / utils / xml / XMLEncodingUtils.java @ 40561
History | View | Annotate | Download (6.3 KB)
1 |
/**
|
---|---|
2 |
* gvSIG. Desktop Geographic Information System.
|
3 |
*
|
4 |
* Copyright (C) 2007-2013 gvSIG Association.
|
5 |
*
|
6 |
* This program is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU General Public License
|
8 |
* as published by the Free Software Foundation; either version 3
|
9 |
* of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This program is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14 |
* GNU General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU General Public License
|
17 |
* along with this program; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
19 |
* MA 02110-1301, USA.
|
20 |
*
|
21 |
* For any additional information, do not hesitate to contact us
|
22 |
* at info AT gvsig.com, or visit our website www.gvsig.com.
|
23 |
*/
|
24 |
package org.gvsig.utils.xml; |
25 |
|
26 |
import java.io.BufferedInputStream; |
27 |
import java.io.File; |
28 |
import java.io.FileInputStream; |
29 |
import java.io.FileNotFoundException; |
30 |
import java.io.IOException; |
31 |
import java.io.InputStream; |
32 |
import java.io.InputStreamReader; |
33 |
import java.io.UnsupportedEncodingException; |
34 |
|
35 |
/**
|
36 |
* A set of methods to detect XML encoding. The class is able to autodetect
|
37 |
* certain encodings, and it reads the XML header for the rest of encodings.
|
38 |
*
|
39 |
* @author C?sar Mart?nez Izquierdo <cesar.martinez@iver.es>
|
40 |
*
|
41 |
*/
|
42 |
public class XMLEncodingUtils { |
43 |
InputStream _is;
|
44 |
|
45 |
/**
|
46 |
* Creates a new XMLEncodingUtils object.
|
47 |
*
|
48 |
* @param is An InputStream connected to the XML file to process.
|
49 |
*/
|
50 |
public XMLEncodingUtils(InputStream is) { |
51 |
if (is == null) |
52 |
throw new IllegalArgumentException(); |
53 |
_is = is; |
54 |
} |
55 |
|
56 |
/**
|
57 |
* Gets the encoding of the XML file.
|
58 |
*
|
59 |
* The following encodings can be detected: UTF-32BE, UTF-32LE,
|
60 |
* UTF-16BE, UTF-16-LE, UTF-8. The rest of the encodings are
|
61 |
* read from the XML header.
|
62 |
*
|
63 |
* @return Returns the encoding of the XML file, or null if the
|
64 |
* encoding couldn't be correctly detected or read from the XML
|
65 |
* header.
|
66 |
*/
|
67 |
public String getEncoding() { |
68 |
int srcCount = 0; |
69 |
String enc=null; |
70 |
char[] srcBuf = new char[128]; |
71 |
|
72 |
// read four bytes
|
73 |
int chk = 0; |
74 |
try {
|
75 |
while (srcCount < 4) { |
76 |
int i = _is.read();
|
77 |
if (i == -1) |
78 |
break;
|
79 |
chk = (chk << 8) | i;
|
80 |
srcBuf[srcCount++] = (char) i;
|
81 |
} |
82 |
|
83 |
if (srcCount == 4) { |
84 |
switch (chk) {
|
85 |
case 0x00000FEFF : |
86 |
enc = "UTF-32BE";
|
87 |
srcCount = 0;
|
88 |
break;
|
89 |
|
90 |
case 0x0FFFE0000 : |
91 |
enc = "UTF-32LE";
|
92 |
srcCount = 0;
|
93 |
break;
|
94 |
|
95 |
case 0x03c : |
96 |
enc = "UTF-32BE";
|
97 |
srcBuf[0] = '<'; |
98 |
srcCount = 1;
|
99 |
break;
|
100 |
|
101 |
case 0x03c000000 : |
102 |
enc = "UTF-32LE";
|
103 |
srcBuf[0] = '<'; |
104 |
srcCount = 1;
|
105 |
break;
|
106 |
|
107 |
case 0x0003c003f : |
108 |
enc = "UTF-16BE";
|
109 |
srcBuf[0] = '<'; |
110 |
srcBuf[1] = '?'; |
111 |
srcCount = 2;
|
112 |
break;
|
113 |
|
114 |
case 0x03c003f00 : |
115 |
enc = "UTF-16LE";
|
116 |
srcBuf[0] = '<'; |
117 |
srcBuf[1] = '?'; |
118 |
srcCount = 2;
|
119 |
break;
|
120 |
|
121 |
case 0x03c3f786d : |
122 |
while (true) { |
123 |
int i = _is.read();
|
124 |
if (i == -1) |
125 |
break;
|
126 |
srcBuf[srcCount++] = (char) i;
|
127 |
if (i == '>') { |
128 |
String s = new String(srcBuf, 0, srcCount); |
129 |
int i0 = s.indexOf("encoding"); |
130 |
if (i0 != -1) { |
131 |
while (s.charAt(i0) != '"' |
132 |
&& s.charAt(i0) != '\'')
|
133 |
i0++; |
134 |
char deli = s.charAt(i0++);
|
135 |
int i1 = s.indexOf(deli, i0);
|
136 |
enc = s.substring(i0, i1); |
137 |
} |
138 |
break;
|
139 |
} |
140 |
} |
141 |
|
142 |
default :
|
143 |
if ((chk & 0x0ffff0000) == 0x0FEFF0000) { |
144 |
enc = "UTF-16BE";
|
145 |
srcBuf[0] =
|
146 |
(char) ((srcBuf[2] << 8) | srcBuf[3]); |
147 |
srcCount = 1;
|
148 |
} |
149 |
else if ((chk & 0x0ffff0000) == 0x0fffe0000) { |
150 |
enc = "UTF-16LE";
|
151 |
srcBuf[0] =
|
152 |
(char) ((srcBuf[3] << 8) | srcBuf[2]); |
153 |
srcCount = 1;
|
154 |
} |
155 |
else if ((chk & 0x0ffffff00) == 0x0EFBBBF00) { |
156 |
enc = "UTF-8";
|
157 |
srcBuf[0] = srcBuf[3]; |
158 |
srcCount = 1;
|
159 |
} |
160 |
} |
161 |
} |
162 |
} |
163 |
catch (IOException ex) { |
164 |
return null; |
165 |
} |
166 |
return enc;
|
167 |
} |
168 |
|
169 |
/**
|
170 |
* Gets an InputStreamReader for the provided XML file.
|
171 |
* The reader uses the right encoding, as specified in
|
172 |
* the XML header (or autodetected).
|
173 |
*
|
174 |
* @return A reader which uses the right encoding, or null
|
175 |
* if the encoding couldn't be correctly detected or read
|
176 |
* from the XML header.
|
177 |
*/
|
178 |
public InputStreamReader getReader() { |
179 |
String encoding = getEncoding();
|
180 |
if (encoding==null) |
181 |
return null; |
182 |
try {
|
183 |
return new InputStreamReader(_is, encoding); |
184 |
} catch (UnsupportedEncodingException e) { |
185 |
return null; |
186 |
} |
187 |
} |
188 |
|
189 |
/**
|
190 |
* Gets an InputStreamReader for the provided XML file.
|
191 |
* The reader uses the right encoding, as specified in
|
192 |
* the XML header (or autodetected).
|
193 |
*
|
194 |
* @param is An InputStream connected to the XML file to process
|
195 |
* @return A reader for the provided XML file.
|
196 |
* @see getReader()
|
197 |
*/
|
198 |
public static InputStreamReader getReader(InputStream is) { |
199 |
XMLEncodingUtils util = new XMLEncodingUtils(is);
|
200 |
return util.getReader();
|
201 |
} |
202 |
|
203 |
/**
|
204 |
* Gets the character encoding of the XML file.
|
205 |
*
|
206 |
* @param is An InputStream connected to the XML file to process
|
207 |
* @see getEncoding()
|
208 |
* @return The encoding of the file
|
209 |
*/
|
210 |
public static String getEncoding(InputStream is) { |
211 |
XMLEncodingUtils util = new XMLEncodingUtils(is);
|
212 |
return util.getEncoding();
|
213 |
} |
214 |
|
215 |
/**
|
216 |
* Gets an InputStreamReader for the provided XML file.
|
217 |
* The reader uses the right encoding, as specified in
|
218 |
* the XML header (or autodetected).
|
219 |
*
|
220 |
* @param file The XML file to process
|
221 |
* @return A reader for the provided XML file.
|
222 |
* @see getReader()
|
223 |
*/
|
224 |
public static InputStreamReader getReader(File file) throws FileNotFoundException { |
225 |
BufferedInputStream bs = new BufferedInputStream(new FileInputStream(file)); |
226 |
XMLEncodingUtils util = new XMLEncodingUtils(bs);
|
227 |
return util.getReader();
|
228 |
} |
229 |
|
230 |
/**
|
231 |
* Gets the character encoding of the XML file.
|
232 |
*
|
233 |
* @param File The XML file to process
|
234 |
* @see getEncoding()
|
235 |
* @return The encoding of the file
|
236 |
* @throws FileNotFoundException
|
237 |
*/
|
238 |
public static String getEncoding(File file) throws FileNotFoundException { |
239 |
BufferedInputStream bs = new BufferedInputStream(new FileInputStream(file)); |
240 |
XMLEncodingUtils util = new XMLEncodingUtils(bs);
|
241 |
return util.getEncoding();
|
242 |
} |
243 |
} |