gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / tests / test_codec.py @ 475
History | View | Annotate | Download (15 KB)
1 |
"""Testcases for cssutils.codec"""
|
---|---|
2 |
|
3 |
import codecs |
4 |
import unittest |
5 |
import sys |
6 |
|
7 |
PY2x = sys.version_info < (3,0) |
8 |
if PY2x:
|
9 |
import StringIO |
10 |
iostream = StringIO.StringIO |
11 |
else:
|
12 |
import io |
13 |
iostream = io.BytesIO |
14 |
|
15 |
from cssutils import codec |
16 |
|
17 |
try:
|
18 |
codecs.lookup("utf-32")
|
19 |
except LookupError: |
20 |
haveutf32 = False
|
21 |
else:
|
22 |
haveutf32 = True
|
23 |
|
24 |
|
25 |
class Queue(object): |
26 |
"""
|
27 |
queue: write bytes at one end, read bytes from the other end
|
28 |
"""
|
29 |
def __init__(self): |
30 |
self._buffer = "".encode() |
31 |
|
32 |
def write(self, chars): |
33 |
# TODO ???
|
34 |
if not PY2x: |
35 |
if isinstance(chars, str): |
36 |
chars = chars.encode() |
37 |
elif isinstance(chars, int): |
38 |
chars = bytes([chars])
|
39 |
|
40 |
self._buffer += chars
|
41 |
|
42 |
def read(self, size=-1): |
43 |
if size<0: |
44 |
s = self._buffer
|
45 |
self._buffer = "".encode() |
46 |
return s
|
47 |
else:
|
48 |
s = self._buffer[:size]
|
49 |
self._buffer = self._buffer[size:] |
50 |
return s
|
51 |
|
52 |
|
53 |
class CodecTestCase(unittest.TestCase): |
54 |
|
55 |
def test_detectencoding_str(self): |
56 |
"codec.detectencoding_str()"
|
57 |
self.assertEqual(codec.detectencoding_str(u''.encode()), (None, False)) |
58 |
self.assertEqual(codec.detectencoding_str(u'\xef'.encode('latin1')), (None, False)) |
59 |
self.assertEqual(codec.detectencoding_str(u'\xef\x33'.encode("utf-8")), ("utf-8", False)) |
60 |
self.assertEqual(codec.detectencoding_str(u'\xc3\xaf3'.encode("utf-8")), ("utf-8", False)) |
61 |
self.assertEqual(codec.detectencoding_str(u'\xef\xbb'.encode("latin1")), (None, False)) |
62 |
self.assertEqual(codec.detectencoding_str(u'\xef\xbb\x33'.encode("utf-8")), ("utf-8", False)) |
63 |
self.assertEqual(codec.detectencoding_str(u'\xef\xbb\xbf'.encode("utf-8-sig")), ("utf-8-sig", True)) |
64 |
self.assertEqual(codec.detectencoding_str(u'\xff'.encode("latin1")), (None, False)) |
65 |
self.assertEqual(codec.detectencoding_str(u'\xff\x33'.encode("utf-8")), ("utf-8", False)) |
66 |
self.assertEqual(codec.detectencoding_str(u'\xff\xfe'.encode("latin1")), (None, False)) |
67 |
self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x33'.encode("utf-16")), ("utf-16", True)) |
68 |
self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00'.encode("latin1")), (None, False)) |
69 |
self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00\x33'.encode("utf-16")), ("utf-16", True)) |
70 |
if haveutf32:
|
71 |
self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00\x00'.encode("utf-32")), ("utf-32", True)) |
72 |
self.assertEqual(codec.detectencoding_str(u'\x00'.encode()), (None, False)) |
73 |
self.assertEqual(codec.detectencoding_str(u'\x00\x33'.encode()), ("utf-8", False)) |
74 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00'.encode()), (None, False)) |
75 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00\x33'.encode()), ("utf-8", False)) |
76 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00\xfe'.encode('latin1')), (None, False)) |
77 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00\x00\x33'.encode()), ("utf-8", False)) |
78 |
if haveutf32:
|
79 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00\x00@'.encode()), ("utf-32-be", False)) |
80 |
self.assertEqual(codec.detectencoding_str(u'\x00\x00\xfe\xff'.encode('utf-32')), ("utf-32", True)) |
81 |
self.assertEqual(codec.detectencoding_str(u'@'.encode()), (None, False)) |
82 |
self.assertEqual(codec.detectencoding_str(u'@\x33'.encode()), ("utf-8", False)) |
83 |
self.assertEqual(codec.detectencoding_str(u'@\x00'.encode()), (None, False)) |
84 |
self.assertEqual(codec.detectencoding_str(u'@\x00\x33'.encode()), ("utf-8", False)) |
85 |
self.assertEqual(codec.detectencoding_str(u'@\x00\x00'.encode()), (None, False)) |
86 |
self.assertEqual(codec.detectencoding_str(u'@\x00\x00\x33'.encode()), ("utf-8", False)) |
87 |
if haveutf32:
|
88 |
self.assertEqual(codec.detectencoding_str(u'@\x00\x00\x00'.encode()), ("utf-32-le", False)) |
89 |
self.assertEqual(codec.detectencoding_str(u'@c'.encode()), (None, False)) |
90 |
self.assertEqual(codec.detectencoding_str(u'@ch'.encode()), (None, False)) |
91 |
self.assertEqual(codec.detectencoding_str(u'@cha'.encode()), (None, False)) |
92 |
self.assertEqual(codec.detectencoding_str(u'@char'.encode()), (None, False)) |
93 |
self.assertEqual(codec.detectencoding_str(u'@chars'.encode()), (None, False)) |
94 |
self.assertEqual(codec.detectencoding_str(u'@charse'.encode()), (None, False)) |
95 |
self.assertEqual(codec.detectencoding_str(u'@charset'.encode()), (None, False)) |
96 |
self.assertEqual(codec.detectencoding_str(u'@charset '.encode()), (None, False)) |
97 |
self.assertEqual(codec.detectencoding_str(u'@charset "'.encode()), (None, False)) |
98 |
self.assertEqual(codec.detectencoding_str(u'@charset "x'.encode()), (None, False)) |
99 |
self.assertEqual(codec.detectencoding_str(u'@charset ""'.encode()), ("", True)) |
100 |
self.assertEqual(codec.detectencoding_str(u'@charset "x"'.encode()), ("x", True)) |
101 |
self.assertEqual(codec.detectencoding_str(u"@".encode(), False), (None, False)) |
102 |
self.assertEqual(codec.detectencoding_str(u"@".encode(), True), ("utf-8", False)) |
103 |
self.assertEqual(codec.detectencoding_str(u"@c".encode(), False), (None, False)) |
104 |
self.assertEqual(codec.detectencoding_str(u"@c".encode(), True), ("utf-8", False)) |
105 |
|
106 |
def test_detectencoding_unicode(self): |
107 |
"codec.detectencoding_unicode()"
|
108 |
# Unicode version (only parses the header)
|
109 |
self.assertEqual(codec.detectencoding_unicode(u'@charset "x'), (None, False)) |
110 |
self.assertEqual(codec.detectencoding_unicode(u'a {}'), ("utf-8", False)) |
111 |
self.assertEqual(codec.detectencoding_unicode(u'@charset "x', True), (None, False)) |
112 |
self.assertEqual(codec.detectencoding_unicode(u'@charset "x"'), ("x", True)) |
113 |
|
114 |
def test_fixencoding(self): |
115 |
"codec._fixencoding()"
|
116 |
s = u'@charset "'
|
117 |
self.assertTrue(codec._fixencoding(s, u"utf-8") is None) |
118 |
|
119 |
s = u'@charset "x'
|
120 |
self.assertTrue(codec._fixencoding(s, u"utf-8") is None) |
121 |
|
122 |
s = u'@charset "x'
|
123 |
self.assertEqual(codec._fixencoding(s, u"utf-8", True), s) |
124 |
|
125 |
s = u'@charset x'
|
126 |
self.assertEqual(codec._fixencoding(s, u"utf-8"), s) |
127 |
|
128 |
s = u'@charset "x"'
|
129 |
self.assertEqual(codec._fixencoding(s, u"utf-8"), s.replace('"x"', '"utf-8"')) |
130 |
|
131 |
def test_decoder(self): |
132 |
"codecs.decoder"
|
133 |
def checkauto(encoding, input=u'@charset "x";g\xfcrk\u20ac{}'): |
134 |
outputencoding = encoding |
135 |
if outputencoding == "utf-8-sig": |
136 |
outputencoding = "utf-8"
|
137 |
# Check stateless decoder with encoding autodetection
|
138 |
d = codecs.getdecoder("css")
|
139 |
self.assertEqual(d(input.encode(encoding))[0], input.replace('"x"', '"%s"' % outputencoding)) |
140 |
|
141 |
# Check stateless decoder with specified encoding
|
142 |
self.assertEqual(d(input.encode(encoding), encoding=encoding)[0], input.replace('"x"', '"%s"' % outputencoding)) |
143 |
|
144 |
if hasattr(codec, "getincrementaldecoder"): |
145 |
# Check incremental decoder with encoding autodetection
|
146 |
id = codecs.getincrementaldecoder("css")()
|
147 |
self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input.replace('"x"', '"%s"' % outputencoding)) |
148 |
|
149 |
# Check incremental decoder with specified encoding
|
150 |
id = codecs.getincrementaldecoder("css")(encoding=encoding)
|
151 |
self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input.replace('"x"', '"%s"' % outputencoding)) |
152 |
|
153 |
# Check stream reader with encoding autodetection
|
154 |
q = Queue() |
155 |
sr = codecs.getreader("css")(q)
|
156 |
result = [] |
157 |
# TODO: py3 only???
|
158 |
for c in input.encode(encoding): |
159 |
q.write(c) |
160 |
result.append(sr.read()) |
161 |
self.assertEqual("".join(result), input.replace('"x"', '"%s"' % outputencoding)) |
162 |
|
163 |
# Check stream reader with specified encoding
|
164 |
q = Queue() |
165 |
sr = codecs.getreader("css")(q, encoding=encoding)
|
166 |
result = [] |
167 |
for c in input.encode(encoding): |
168 |
q.write(c) |
169 |
result.append(sr.read()) |
170 |
self.assertEqual("".join(result), input.replace('"x"', '"%s"' % outputencoding)) |
171 |
|
172 |
# Autodetectable encodings
|
173 |
checkauto("utf-8-sig")
|
174 |
checkauto("utf-16")
|
175 |
checkauto("utf-16-le")
|
176 |
checkauto("utf-16-be")
|
177 |
if haveutf32:
|
178 |
checkauto("utf-32")
|
179 |
checkauto("utf-32-le")
|
180 |
checkauto("utf-32-be")
|
181 |
|
182 |
def checkdecl(encoding, input=u'@charset "%s";g\xfcrk{}'): |
183 |
# Check stateless decoder with encoding autodetection
|
184 |
d = codecs.getdecoder("css")
|
185 |
input = input % encoding
|
186 |
outputencoding = encoding |
187 |
if outputencoding == "utf-8-sig": |
188 |
outputencoding = "utf-8"
|
189 |
self.assertEqual(d(input.encode(encoding))[0], input) |
190 |
|
191 |
# Check stateless decoder with specified encoding
|
192 |
self.assertEqual(d(input.encode(encoding), encoding=encoding)[0], input) |
193 |
|
194 |
if hasattr(codec, "getincrementaldecoder"): |
195 |
# Check incremental decoder with encoding autodetection
|
196 |
id = codecs.getincrementaldecoder("css")()
|
197 |
self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input) |
198 |
|
199 |
# Check incremental decoder with specified encoding
|
200 |
id = codecs.getincrementaldecoder("css")(encoding)
|
201 |
self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input) |
202 |
|
203 |
# Check stream reader with encoding autodetection
|
204 |
q = Queue() |
205 |
sr = codecs.getreader("css")(q)
|
206 |
result = [] |
207 |
for c in input.encode(encoding): |
208 |
q.write(c) |
209 |
result.append(sr.read()) |
210 |
self.assertEqual("".join(result), input) |
211 |
|
212 |
# Check stream reader with specified encoding
|
213 |
q = Queue() |
214 |
sr = codecs.getreader("css")(q, encoding=encoding)
|
215 |
result = [] |
216 |
for c in input.encode(encoding): |
217 |
q.write(c) |
218 |
result.append(sr.read()) |
219 |
self.assertEqual("".join(result), input) |
220 |
|
221 |
# Use correct declaration
|
222 |
checkdecl("utf-8")
|
223 |
checkdecl("iso-8859-1", u'@charset "%s";g\xfcrk') |
224 |
checkdecl("iso-8859-15")
|
225 |
checkdecl("cp1252")
|
226 |
|
227 |
# No recursion
|
228 |
self.assertRaises(ValueError, u'@charset "css";div{}'.encode().decode, "css") |
229 |
|
230 |
def test_encoder(self): |
231 |
"codec.encoder"
|
232 |
def check(encoding, input=u'@charset "x";g\xfcrk\u20ac{}'): |
233 |
outputencoding = encoding |
234 |
if outputencoding == "utf-8-sig": |
235 |
outputencoding = "utf-8"
|
236 |
|
237 |
# Check stateless encoder with encoding autodetection
|
238 |
e = codecs.getencoder("css")
|
239 |
inputdecl = input.replace('"x"', '"%s"' % encoding) |
240 |
outputdecl = input.replace('"x"', '"%s"' % outputencoding) |
241 |
self.assertEqual(e(inputdecl)[0].decode(encoding), outputdecl) |
242 |
|
243 |
# Check stateless encoder with specified encoding
|
244 |
self.assertEqual(e(input, encoding=encoding)[0].decode(encoding), outputdecl) |
245 |
|
246 |
if hasattr(codec, "getincrementalencoder"): |
247 |
# Check incremental encoder with encoding autodetection
|
248 |
ie = codecs.getincrementalencoder("css")()
|
249 |
self.assertEqual("".join(ie.iterencode(inputdecl)).decode(encoding), outputdecl) |
250 |
|
251 |
# Check incremental encoder with specified encoding
|
252 |
ie = codecs.getincrementalencoder("css")(encoding=encoding)
|
253 |
self.assertEqual("".join(ie.iterencode(input)).decode(encoding), outputdecl) |
254 |
|
255 |
# Check stream writer with encoding autodetection
|
256 |
q = Queue() |
257 |
sw = codecs.getwriter("css")(q)
|
258 |
for c in inputdecl:#.encode(outputencoding): # TODO: .encode()??? |
259 |
sw.write(c) |
260 |
self.assertEqual(q.read().decode(encoding), input.replace('"x"', '"%s"' % outputencoding)) |
261 |
|
262 |
# Check stream writer with specified encoding
|
263 |
q = Queue() |
264 |
sw = codecs.getwriter("css")(q, encoding=encoding)
|
265 |
for c in input: |
266 |
sw.write(c) |
267 |
self.assertEqual(q.read().decode(encoding), input.replace('"x"', '"%s"' % outputencoding)) |
268 |
|
269 |
# Autodetectable encodings
|
270 |
check("utf-8-sig")
|
271 |
check("utf-16")
|
272 |
check("utf-16-le")
|
273 |
check("utf-16-be")
|
274 |
if haveutf32:
|
275 |
check("utf-32")
|
276 |
check("utf-32-le")
|
277 |
check("utf-32-be")
|
278 |
check("utf-8")
|
279 |
check("iso-8859-1", u'@charset "x";g\xfcrk{}') |
280 |
check("iso-8859-15")
|
281 |
check("cp1252")
|
282 |
|
283 |
# No recursion
|
284 |
self.assertRaises(ValueError, u'@charset "css";div{}'.encode, "css") |
285 |
|
286 |
def test_decode_force(self): |
287 |
"codec.decode (force)"
|
288 |
info = codecs.lookup("css")
|
289 |
|
290 |
def decodeall(input, **kwargs): |
291 |
# Py 2.5: info.decode('@charset "utf-8"; x')
|
292 |
return info[1](input, **kwargs)[0] |
293 |
|
294 |
def incdecode(input, **kwargs): |
295 |
decoder = info.incrementaldecoder(**kwargs) |
296 |
return decoder.decode(input) |
297 |
|
298 |
def streamdecode(input, **kwargs): |
299 |
stream = iostream(input) # py3 .decode('utf-8') but still error?! |
300 |
reader = info.streamreader(stream, **kwargs) |
301 |
return reader.read()
|
302 |
|
303 |
for d in (decodeall, incdecode, streamdecode): |
304 |
# input = '@charset "utf-8"; \xc3\xbf'
|
305 |
# output = u'@charset "utf-8"; \xff'
|
306 |
# self.assertEqual(d(input), output)
|
307 |
#
|
308 |
# input = '@charset "utf-8"; \xc3\xbf'
|
309 |
# output = u'@charset "iso-8859-1"; \xc3\xbf'
|
310 |
# self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
|
311 |
#
|
312 |
# input = '\xc3\xbf'
|
313 |
# output = u'\xc3\xbf'
|
314 |
# self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
|
315 |
#
|
316 |
# input = '@charset "utf-8"; \xc3\xbf'
|
317 |
# output = u'@charset "utf-8"; \xff'
|
318 |
# self.assertEqual(d(input, encoding="iso-8859-1", force=False), output)
|
319 |
|
320 |
input = u'@charset "utf-8"; \xff'.encode('utf-8') |
321 |
output = u'@charset "utf-8"; \xff'
|
322 |
self.assertEqual(d(input), output) |
323 |
|
324 |
#input = b'@charset "utf-8"; \xc3\xbf'
|
325 |
input = u'@charset "utf-8"; \xff'.encode('utf-8') |
326 |
output = u'@charset "iso-8859-1"; \xc3\xbf'
|
327 |
self.assertEqual(d(input, encoding="iso-8859-1", force=True), output) |
328 |
|
329 |
#input = b'\xc3\xbf'
|
330 |
input = u'\xff'.encode('utf-8') |
331 |
output = u'\xc3\xbf'
|
332 |
self.assertEqual(d(input, encoding="iso-8859-1", force=True), output) |
333 |
|
334 |
#input = b'@charset "utf-8"; \xc3\xbf'
|
335 |
input = u'@charset "utf-8"; \xff'.encode('utf-8') |
336 |
output = u'@charset "utf-8"; \xff'
|
337 |
self.assertEqual(d(input, encoding="iso-8859-1", force=False), output) |
338 |
|
339 |
|
340 |
if __name__ == '__main__': |
341 |
import unittest |
342 |
unittest.main() |