Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / tests / test_codec.py @ 475

History | View | Annotate | Download (15 KB)

1
"""Testcases for cssutils.codec"""
2

    
3
import codecs
4
import unittest
5
import sys
6

    
7
PY2x = sys.version_info < (3,0)
8
if PY2x:
9
    import StringIO
10
    iostream = StringIO.StringIO
11
else:
12
    import io
13
    iostream = io.BytesIO 
14

    
15
from cssutils import codec
16

    
17
try:
18
    codecs.lookup("utf-32")
19
except LookupError:
20
    haveutf32 = False
21
else:
22
    haveutf32 = True
23

    
24

    
25
class Queue(object):
26
    """
27
    queue: write bytes at one end, read bytes from the other end
28
    """
29
    def __init__(self):
30
        self._buffer = "".encode()
31

    
32
    def write(self, chars):
33
        # TODO ???
34
        if not PY2x:
35
            if isinstance(chars, str):
36
                chars = chars.encode()
37
            elif isinstance(chars, int):
38
                chars = bytes([chars])
39
            
40
        self._buffer += chars
41

    
42
    def read(self, size=-1):
43
        if size<0:
44
            s = self._buffer
45
            self._buffer = "".encode()
46
            return s
47
        else:
48
            s = self._buffer[:size]
49
            self._buffer = self._buffer[size:]
50
            return s
51

    
52

    
53
class CodecTestCase(unittest.TestCase):
54

    
55
    def test_detectencoding_str(self):
56
        "codec.detectencoding_str()"
57
        self.assertEqual(codec.detectencoding_str(u''.encode()), (None, False))
58
        self.assertEqual(codec.detectencoding_str(u'\xef'.encode('latin1')), (None, False))
59
        self.assertEqual(codec.detectencoding_str(u'\xef\x33'.encode("utf-8")), ("utf-8", False))
60
        self.assertEqual(codec.detectencoding_str(u'\xc3\xaf3'.encode("utf-8")), ("utf-8", False))
61
        self.assertEqual(codec.detectencoding_str(u'\xef\xbb'.encode("latin1")), (None, False))
62
        self.assertEqual(codec.detectencoding_str(u'\xef\xbb\x33'.encode("utf-8")), ("utf-8", False))
63
        self.assertEqual(codec.detectencoding_str(u'\xef\xbb\xbf'.encode("utf-8-sig")), ("utf-8-sig", True))
64
        self.assertEqual(codec.detectencoding_str(u'\xff'.encode("latin1")), (None, False))
65
        self.assertEqual(codec.detectencoding_str(u'\xff\x33'.encode("utf-8")), ("utf-8", False))
66
        self.assertEqual(codec.detectencoding_str(u'\xff\xfe'.encode("latin1")), (None, False))
67
        self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x33'.encode("utf-16")), ("utf-16", True))
68
        self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00'.encode("latin1")), (None, False))
69
        self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00\x33'.encode("utf-16")), ("utf-16", True))
70
        if haveutf32:
71
            self.assertEqual(codec.detectencoding_str(u'\xff\xfe\x00\x00'.encode("utf-32")), ("utf-32", True))
72
        self.assertEqual(codec.detectencoding_str(u'\x00'.encode()), (None, False))
73
        self.assertEqual(codec.detectencoding_str(u'\x00\x33'.encode()), ("utf-8", False))
74
        self.assertEqual(codec.detectencoding_str(u'\x00\x00'.encode()), (None, False))
75
        self.assertEqual(codec.detectencoding_str(u'\x00\x00\x33'.encode()), ("utf-8", False))
76
        self.assertEqual(codec.detectencoding_str(u'\x00\x00\xfe'.encode('latin1')), (None, False))
77
        self.assertEqual(codec.detectencoding_str(u'\x00\x00\x00\x33'.encode()), ("utf-8", False))
78
        if haveutf32:
79
            self.assertEqual(codec.detectencoding_str(u'\x00\x00\x00@'.encode()), ("utf-32-be", False))
80
            self.assertEqual(codec.detectencoding_str(u'\x00\x00\xfe\xff'.encode('utf-32')), ("utf-32", True))
81
        self.assertEqual(codec.detectencoding_str(u'@'.encode()), (None, False))
82
        self.assertEqual(codec.detectencoding_str(u'@\x33'.encode()), ("utf-8", False))
83
        self.assertEqual(codec.detectencoding_str(u'@\x00'.encode()), (None, False))
84
        self.assertEqual(codec.detectencoding_str(u'@\x00\x33'.encode()), ("utf-8", False))
85
        self.assertEqual(codec.detectencoding_str(u'@\x00\x00'.encode()), (None, False))
86
        self.assertEqual(codec.detectencoding_str(u'@\x00\x00\x33'.encode()), ("utf-8", False))
87
        if haveutf32:
88
            self.assertEqual(codec.detectencoding_str(u'@\x00\x00\x00'.encode()), ("utf-32-le", False))
89
        self.assertEqual(codec.detectencoding_str(u'@c'.encode()), (None, False))
90
        self.assertEqual(codec.detectencoding_str(u'@ch'.encode()), (None, False))
91
        self.assertEqual(codec.detectencoding_str(u'@cha'.encode()), (None, False))
92
        self.assertEqual(codec.detectencoding_str(u'@char'.encode()), (None, False))
93
        self.assertEqual(codec.detectencoding_str(u'@chars'.encode()), (None, False))
94
        self.assertEqual(codec.detectencoding_str(u'@charse'.encode()), (None, False))
95
        self.assertEqual(codec.detectencoding_str(u'@charset'.encode()), (None, False))
96
        self.assertEqual(codec.detectencoding_str(u'@charset '.encode()), (None, False))
97
        self.assertEqual(codec.detectencoding_str(u'@charset "'.encode()), (None, False))
98
        self.assertEqual(codec.detectencoding_str(u'@charset "x'.encode()), (None, False))
99
        self.assertEqual(codec.detectencoding_str(u'@charset ""'.encode()), ("", True))
100
        self.assertEqual(codec.detectencoding_str(u'@charset "x"'.encode()), ("x", True))
101
        self.assertEqual(codec.detectencoding_str(u"@".encode(), False), (None, False))
102
        self.assertEqual(codec.detectencoding_str(u"@".encode(), True), ("utf-8", False))
103
        self.assertEqual(codec.detectencoding_str(u"@c".encode(), False), (None, False))
104
        self.assertEqual(codec.detectencoding_str(u"@c".encode(), True), ("utf-8", False))
105

    
106
    def test_detectencoding_unicode(self):
107
        "codec.detectencoding_unicode()"
108
        # Unicode version (only parses the header)
109
        self.assertEqual(codec.detectencoding_unicode(u'@charset "x'), (None, False))
110
        self.assertEqual(codec.detectencoding_unicode(u'a {}'), ("utf-8", False))
111
        self.assertEqual(codec.detectencoding_unicode(u'@charset "x', True), (None, False))
112
        self.assertEqual(codec.detectencoding_unicode(u'@charset "x"'), ("x", True))
113

    
114
    def test_fixencoding(self):
115
        "codec._fixencoding()"
116
        s = u'@charset "'
117
        self.assertTrue(codec._fixencoding(s, u"utf-8") is None)
118

    
119
        s = u'@charset "x'
120
        self.assertTrue(codec._fixencoding(s, u"utf-8") is None)
121

    
122
        s = u'@charset "x'
123
        self.assertEqual(codec._fixencoding(s, u"utf-8", True), s)
124

    
125
        s = u'@charset x'
126
        self.assertEqual(codec._fixencoding(s, u"utf-8"), s)
127

    
128
        s = u'@charset "x"'
129
        self.assertEqual(codec._fixencoding(s, u"utf-8"), s.replace('"x"', '"utf-8"'))
130

    
131
    def test_decoder(self):
132
        "codecs.decoder"
133
        def checkauto(encoding, input=u'@charset "x";g\xfcrk\u20ac{}'):
134
            outputencoding = encoding
135
            if outputencoding == "utf-8-sig":
136
                outputencoding = "utf-8"
137
            # Check stateless decoder with encoding autodetection
138
            d = codecs.getdecoder("css")
139
            self.assertEqual(d(input.encode(encoding))[0], input.replace('"x"', '"%s"' % outputencoding))
140

    
141
            # Check stateless decoder with specified encoding
142
            self.assertEqual(d(input.encode(encoding), encoding=encoding)[0], input.replace('"x"', '"%s"' % outputencoding))
143

    
144
            if hasattr(codec, "getincrementaldecoder"):
145
                # Check incremental decoder with encoding autodetection
146
                id = codecs.getincrementaldecoder("css")()
147
                self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input.replace('"x"', '"%s"' % outputencoding))
148

    
149
                # Check incremental decoder with specified encoding
150
                id = codecs.getincrementaldecoder("css")(encoding=encoding)
151
                self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input.replace('"x"', '"%s"' % outputencoding))
152

    
153
            # Check stream reader with encoding autodetection
154
            q = Queue()
155
            sr = codecs.getreader("css")(q)
156
            result = []
157
            # TODO: py3 only???
158
            for c in input.encode(encoding):
159
                q.write(c)
160
                result.append(sr.read())
161
            self.assertEqual("".join(result), input.replace('"x"', '"%s"' % outputencoding))
162

    
163
            # Check stream reader with specified encoding
164
            q = Queue()
165
            sr = codecs.getreader("css")(q, encoding=encoding)
166
            result = []
167
            for c in input.encode(encoding):
168
                q.write(c)
169
                result.append(sr.read())
170
            self.assertEqual("".join(result), input.replace('"x"', '"%s"' % outputencoding))
171

    
172
        # Autodetectable encodings
173
        checkauto("utf-8-sig")
174
        checkauto("utf-16")
175
        checkauto("utf-16-le")
176
        checkauto("utf-16-be")
177
        if haveutf32:
178
            checkauto("utf-32")
179
            checkauto("utf-32-le")
180
            checkauto("utf-32-be")
181

    
182
        def checkdecl(encoding, input=u'@charset "%s";g\xfcrk{}'):
183
            # Check stateless decoder with encoding autodetection
184
            d = codecs.getdecoder("css")
185
            input = input % encoding
186
            outputencoding = encoding
187
            if outputencoding == "utf-8-sig":
188
                outputencoding = "utf-8"
189
            self.assertEqual(d(input.encode(encoding))[0], input)
190

    
191
            # Check stateless decoder with specified encoding
192
            self.assertEqual(d(input.encode(encoding), encoding=encoding)[0], input)
193

    
194
            if hasattr(codec, "getincrementaldecoder"):
195
                # Check incremental decoder with encoding autodetection
196
                id = codecs.getincrementaldecoder("css")()
197
                self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input)
198

    
199
                # Check incremental decoder with specified encoding
200
                id = codecs.getincrementaldecoder("css")(encoding)
201
                self.assertEqual("".join(id.iterdecode(input.encode(encoding))), input)
202

    
203
            # Check stream reader with encoding autodetection
204
            q = Queue()
205
            sr = codecs.getreader("css")(q)
206
            result = []
207
            for c in input.encode(encoding):
208
                q.write(c)
209
                result.append(sr.read())
210
            self.assertEqual("".join(result), input)
211

    
212
            # Check stream reader with specified encoding
213
            q = Queue()
214
            sr = codecs.getreader("css")(q, encoding=encoding)
215
            result = []
216
            for c in input.encode(encoding):
217
                q.write(c)
218
                result.append(sr.read())
219
            self.assertEqual("".join(result), input)
220

    
221
        # Use correct declaration
222
        checkdecl("utf-8")
223
        checkdecl("iso-8859-1", u'@charset "%s";g\xfcrk')
224
        checkdecl("iso-8859-15")
225
        checkdecl("cp1252")
226

    
227
        # No recursion
228
        self.assertRaises(ValueError, u'@charset "css";div{}'.encode().decode, "css")
229

    
230
    def test_encoder(self):
231
        "codec.encoder"
232
        def check(encoding, input=u'@charset "x";g\xfcrk\u20ac{}'):
233
            outputencoding = encoding
234
            if outputencoding == "utf-8-sig":
235
                outputencoding = "utf-8"
236

    
237
            # Check stateless encoder with encoding autodetection
238
            e = codecs.getencoder("css")
239
            inputdecl = input.replace('"x"', '"%s"' % encoding)
240
            outputdecl = input.replace('"x"', '"%s"' % outputencoding)
241
            self.assertEqual(e(inputdecl)[0].decode(encoding), outputdecl)
242

    
243
            # Check stateless encoder with specified encoding
244
            self.assertEqual(e(input, encoding=encoding)[0].decode(encoding), outputdecl)
245

    
246
            if hasattr(codec, "getincrementalencoder"):
247
                # Check incremental encoder with encoding autodetection
248
                ie = codecs.getincrementalencoder("css")()
249
                self.assertEqual("".join(ie.iterencode(inputdecl)).decode(encoding), outputdecl)
250

    
251
                # Check incremental encoder with specified encoding
252
                ie = codecs.getincrementalencoder("css")(encoding=encoding)
253
                self.assertEqual("".join(ie.iterencode(input)).decode(encoding), outputdecl)
254

    
255
            # Check stream writer with encoding autodetection
256
            q = Queue()
257
            sw = codecs.getwriter("css")(q)
258
            for c in inputdecl:#.encode(outputencoding): # TODO: .encode()???
259
                sw.write(c)
260
            self.assertEqual(q.read().decode(encoding), input.replace('"x"', '"%s"' % outputencoding))
261

    
262
            # Check stream writer with specified encoding
263
            q = Queue()
264
            sw = codecs.getwriter("css")(q, encoding=encoding)
265
            for c in input:
266
                sw.write(c)
267
            self.assertEqual(q.read().decode(encoding), input.replace('"x"', '"%s"' % outputencoding))
268

    
269
        # Autodetectable encodings
270
        check("utf-8-sig")
271
        check("utf-16")
272
        check("utf-16-le")
273
        check("utf-16-be")
274
        if haveutf32:
275
            check("utf-32")
276
            check("utf-32-le")
277
            check("utf-32-be")
278
        check("utf-8")
279
        check("iso-8859-1", u'@charset "x";g\xfcrk{}')
280
        check("iso-8859-15")
281
        check("cp1252")
282

    
283
        # No recursion
284
        self.assertRaises(ValueError, u'@charset "css";div{}'.encode, "css")
285

    
286
    def test_decode_force(self):
287
        "codec.decode (force)"
288
        info = codecs.lookup("css")
289

    
290
        def decodeall(input, **kwargs):
291
            # Py 2.5: info.decode('@charset "utf-8"; x')
292
            return info[1](input, **kwargs)[0]
293

    
294
        def incdecode(input, **kwargs):
295
            decoder = info.incrementaldecoder(**kwargs)
296
            return decoder.decode(input)
297

    
298
        def streamdecode(input, **kwargs):
299
            stream = iostream(input) # py3 .decode('utf-8') but still error?!
300
            reader = info.streamreader(stream, **kwargs)
301
            return reader.read()
302

    
303
        for d in (decodeall, incdecode, streamdecode):
304
#            input = '@charset "utf-8"; \xc3\xbf'
305
#            output = u'@charset "utf-8"; \xff'
306
#            self.assertEqual(d(input), output)
307
#
308
#            input = '@charset "utf-8"; \xc3\xbf'
309
#            output = u'@charset "iso-8859-1"; \xc3\xbf'
310
#            self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
311
#
312
#            input = '\xc3\xbf'
313
#            output = u'\xc3\xbf'
314
#            self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
315
#
316
#            input = '@charset "utf-8"; \xc3\xbf'
317
#            output = u'@charset "utf-8"; \xff'
318
#            self.assertEqual(d(input, encoding="iso-8859-1", force=False), output)
319

    
320
            input = u'@charset "utf-8"; \xff'.encode('utf-8')
321
            output = u'@charset "utf-8"; \xff'
322
            self.assertEqual(d(input), output)
323

    
324
            #input = b'@charset "utf-8"; \xc3\xbf'
325
            input = u'@charset "utf-8"; \xff'.encode('utf-8')
326
            output = u'@charset "iso-8859-1"; \xc3\xbf'
327
            self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
328

    
329
            #input = b'\xc3\xbf'
330
            input = u'\xff'.encode('utf-8')
331
            output = u'\xc3\xbf'
332
            self.assertEqual(d(input, encoding="iso-8859-1", force=True), output)
333

    
334
            #input = b'@charset "utf-8"; \xc3\xbf'
335
            input = u'@charset "utf-8"; \xff'.encode('utf-8')
336
            output = u'@charset "utf-8"; \xff'
337
            self.assertEqual(d(input, encoding="iso-8859-1", force=False), output)
338

    
339

    
340
if __name__ == '__main__':
341
    import unittest
342
    unittest.main()