Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / cssutils / tests / test_encutils / __init__.py @ 475

History | View | Annotate | Download (16.7 KB)

1
# -*- coding: utf-8 -*-
2
"""
3
tests for encutils.py
4
"""
5
import httplib
6
from StringIO import StringIO
7
import sys
8
import unittest
9

    
10
PY2x = sys.version_info < (3,0)
11

    
12
try:
13
    import cssutils.encutils as encutils
14
except ImportError:
15
    import encutils
16

    
17
# helper log
18
log = encutils.buildlog(stream=StringIO())
19

    
20
class AutoEncodingTestCase(unittest.TestCase):
21

    
22
    def _fakeRes(self, content):
23
        "build a fake HTTP response"
24
        class FakeRes:
25
            def __init__(self, content):
26
                if PY2x:
27
                    fp = StringIO(content)
28
                    self._info = httplib.HTTPMessage(fp)
29
                else:
30
                    self._info = httplib.HTTPMessage()
31
                    # Adjust to testdata.
32
                    l = content.split(':')
33
                    if len(l) > 1:
34
                        # Get the type by just
35
                        # using the data at the end.
36
                        t = l[-1].strip()
37
                        self._info.set_type(t)
38

    
39
            def info(self):
40
                return self._info
41

    
42
            def read(self):
43
                return content
44

    
45
        return FakeRes(content)
46

    
47
    def test_getTextTypeByMediaType(self):
48
        "encutils._getTextTypeByMediaType"
49
        tests = {
50
            'application/xml': encutils._XML_APPLICATION_TYPE,
51
            'application/xml-dtd': encutils._XML_APPLICATION_TYPE,
52
            'application/xml-external-parsed-entity': encutils._XML_APPLICATION_TYPE,
53
            'application/xhtml+xml': encutils._XML_APPLICATION_TYPE,
54
            'text/xml': encutils._XML_TEXT_TYPE,
55
            'text/xml-external-parsed-entity': encutils._XML_TEXT_TYPE,
56
            'text/xhtml+xml': encutils._XML_TEXT_TYPE,
57
            'text/html': encutils._HTML_TEXT_TYPE,
58
            'text/css': encutils._TEXT_UTF8,
59
            'text/plain': encutils._TEXT_TYPE,
60
            'x/x': encutils._OTHER_TYPE,
61
            'ANYTHING': encutils._OTHER_TYPE
62
            }
63
        for test, exp in tests.items():
64
            self.assertEqual(
65
                exp, encutils._getTextTypeByMediaType(test, log=log))
66

    
67
    def test_getTextType(self):
68
        "encutils._getTextType"
69
        tests = {
70
            u'\x00\x00\xFE\xFF<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
71
            u'\xFF\xFE\x00\x00<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
72
            u'\xFE\xFF<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
73
            u'\xFF\xFE<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
74
            u'\xef\xbb\xbf<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
75
            u'<?xml version="1.0"': encutils._XML_APPLICATION_TYPE,
76
            u'\x00\x00\xFE\xFFanything': encutils._OTHER_TYPE,
77
            u'\xFF\xFE\x00\x00anything': encutils._OTHER_TYPE,
78
            u'\xFE\xFFanything': encutils._OTHER_TYPE,
79
            u'\xFF\xFEanything': encutils._OTHER_TYPE,
80
            u'\xef\xbb\xbfanything': encutils._OTHER_TYPE,
81
            u'x/x': encutils._OTHER_TYPE,
82
            u'ANYTHING': encutils._OTHER_TYPE
83
            }
84
        for test, exp in tests.items():
85
            self.assertEqual(
86
                exp, encutils._getTextType(test, log=log))
87

    
88
    def test_encodingByMediaType(self):
89
        "encutils.encodingByMediaType"
90
        tests = {
91
            'application/xml': 'utf-8',
92
            'application/xml-dtd': 'utf-8',
93
            'application/xml-external-parsed-entity': 'utf-8',
94
            'application/ANYTHING+xml': 'utf-8',
95
            '  application/xml  ': 'utf-8',
96
            'text/xml': 'ascii',
97
            'text/xml-external-parsed-entity': 'ascii',
98
            'text/ANYTHING+xml': 'ascii',
99
            'text/html': 'iso-8859-1',
100
            'text/css': 'utf-8',
101
            'text/plain': 'iso-8859-1',
102
            'ANYTHING': None
103
            }
104
        for test, exp in tests.items():
105
            self.assertEqual(exp,
106
                             encutils.encodingByMediaType(test, log=log))
107

    
108
    def test_getMetaInfo(self):
109
        "encutils.getMetaInfo"
110
        tests = {
111
            """<meta tp-equiv='Content-Type' content='text/html; charset=ascii'>""":
112
                (None, None),
113
            """<meta http-equiv='ontent-Type' content='text/html; charset=ascii'>""":
114
                (None, None),
115

    
116
            """<meta http-equiv='Content-Type' content='text/html'>""":
117
                ('text/html', None),
118

    
119
            """<meta content='text/html' http-equiv='Content-Type'>""":
120
                ('text/html', None),
121
            """<meta content='text/html;charset=ascii' http-equiv='Content-Type'>""":
122
                ('text/html', 'ascii'),
123

    
124
            """<meta http-equiv='Content-Type' content='text/html ;charset=ascii'>""":
125
                ('text/html', 'ascii'),
126
            """<meta content='text/html;charset=iso-8859-1' http-equiv='Content-Type'>""":
127
                ('text/html', 'iso-8859-1'),
128
            """<meta http-equiv="Content-Type" content="text/html;charset = ascii">""":
129
                ('text/html', 'ascii'),
130

    
131
            """<meta http-equiv="Content-Type" content="text/html;charset=ascii;x=2">""":
132
                ('text/html', 'ascii'),
133
            """<meta http-equiv="Content-Type" content="text/html;x=2;charset=ascii">""":
134
                ('text/html', 'ascii'),
135
            """<meta http-equiv="Content-Type" content="text/html;x=2;charset=ascii;y=2">""":
136
                ('text/html', 'ascii'),
137

    
138
            """<meta http-equiv='Content-Type' content="text/html;charset=ascii">""":
139
                ('text/html', 'ascii'),
140
            """<meta http-equiv='Content-Type' content='text/html;charset=ascii'  />""":
141
                ('text/html', 'ascii'),
142
            """<meta http-equiv = " Content-Type" content = " text/html;charset=ascii " >""":
143
                ('text/html', 'ascii'),
144
            """<meta http-equiv = " \n Content-Type " content = "  \t text/html   ;  charset=ascii " >""":
145
                ('text/html', 'ascii'),
146

    
147
            """<meta content="text/html;charset=ascii" http-equiv="Content-Type">""":
148
                ('text/html', 'ascii'),
149
            """<meta content="text/html;charset=ascii" http-equiv="cONTENT-type">""":
150
                ('text/html', 'ascii'),
151
            """raises exception: </ >""":
152
                (None, None),
153
            """<meta content="text/html;charset=ascii" http-equiv="cONTENT-type">
154
                </ >""":
155
                ('text/html', 'ascii'),
156
            """</ >
157
                <meta content="text/html;charset=ascii" http-equiv="cONTENT-type">""":
158
                ('text/html', 'ascii'),
159
            # py 2.7.3 fixed HTMLParser so:  (None, None)
160
            """<meta content="text/html" http-equiv="cONTENT-type">
161
                </ >
162
                <meta content="text/html;charset=ascii" http-equiv="cONTENT-type">""":
163
                ('text/html', None)
164
            }
165
        for test, exp in tests.items():
166
            self.assertEqual(exp, encutils.getMetaInfo(test, log=log))
167

    
168
    def test_detectXMLEncoding(self):
169
        "encutils.detectXMLEncoding"
170
        tests = {
171
            # BOM
172
            ('utf_32_be'): u'\x00\x00\xFE\xFFanything',
173
            ('utf_32_le'): u'\xFF\xFE\x00\x00anything',
174
            ('utf_16_be'): u'\xFE\xFFanything',
175
            ('utf_16_le'): u'\xFF\xFEanything',
176
            ('utf-8'): u'\xef\xbb\xbfanything',
177
            # encoding=
178
            ('ascii'): '<?xml version="1.0" encoding="ascii" ?>',
179
            ('ascii'): "<?xml version='1.0' encoding='ascii' ?>",
180
            ('iso-8859-1'): "<?xml version='1.0' encoding='iso-8859-1' ?>",
181
            # default
182
            ('utf-8'): '<?xml version="1.0" ?>',
183
            ('utf-8'): '<?xml version="1.0"?><x encoding="ascii"/>'
184
            }
185
        for exp, test in tests.items():
186
            self.assertEqual(exp, encutils.detectXMLEncoding(test, log=log))
187

    
188
    def test_tryEncodings(self):
189
        "encutils.tryEncodings"
190
        try:
191
            import chardet
192
            tests = [
193
                ('ascii', u'abc'.encode('ascii')),
194
                ('windows-1252', u''.encode('windows-1252')),
195
                ('ascii', u'1'.encode('utf-8'))
196
                ]
197
        except ImportError:
198
            tests = [
199
                ('ascii', u'abc'.encode('ascii')),
200
                ('windows-1252', u''.encode('windows-1252')),
201
                ('iso-8859-1', u'äöüß'.encode('iso-8859-1')),
202
                ('iso-8859-1', u'äöüß'.encode('windows-1252')),
203
                #('utf-8', u'\u1111'.encode('utf-8'))
204
                ]
205
        for exp, test in tests:
206
            self.assertEqual(exp, encutils.tryEncodings(test))
207

    
208

    
209
    def test_getEncodingInfo(self):
210
        "encutils.getEncodingInfo"
211
        # (expectedencoding, expectedmismatch): (httpheader, filecontent)
212
        tests = [
213

    
214
            # --- application/xhtml+xml ---
215

    
216
            # header default and XML default
217
            (('utf-8', False), (
218
                '''Content-Type: application/xhtml+xml''',
219
                '''<?xml version="1.0" ?>
220
                    <example>
221
                        <meta http-equiv="Content-Type"
222
                            content="application/xhtml+xml"/>
223
                    </example>''')),
224
            # XML default
225
            (('utf-8', False), (
226
                None,
227
                '''<?xml version="1.0" ?>
228
                    <example>
229
                        <meta http-equiv="Content-Type"
230
                            content="application/xhtml+xml"/>
231
                    </example>''')),
232
            # meta is ignored!
233
            (('utf-8', False), (
234
                '''Content-Type: application/xhtml+xml''',
235
                '''<?xml version="1.0" ?>
236
                    <example>
237
                        <meta http-equiv="Content-Type"
238
                            content="application/xhtml+xml;charset=iso_M"/>
239
                    </example>''')),
240

    
241
            # header enc and XML default
242
            (('iso-h', True), (
243
                '''Content-Type: application/xhtml+xml;charset=iso-H''',
244
                '''<?xml version="1.0" ?>
245
                    <example>
246
                        <meta http-equiv="Content-Type"
247
                            content="application/xhtml+xml"/>
248
                    </example>''')),
249

    
250
            # mismatch header and XML explicit, header wins
251
            (('iso-h', True), (
252
                '''Content-Type: application/xhtml+xml;charset=iso-H''',
253
                '''<?xml version="1.0" encoding="iso-X" ?>
254
                    <example/>''')),
255

    
256
            # header == XML, meta ignored!
257
            (('iso-h', False), (
258
                '''Content-Type: application/xhtml+xml;charset=iso-H''',
259
                '''<?xml version="1.0" encoding="iso-h" ?>
260
                    <example>
261
                        <meta http-equiv="Content-Type"
262
                            content="application/xhtml+xml;charset=iso_M"/>
263
                    </example>''')),
264

    
265
            # XML only, meta ignored!
266
            (('iso-x', False), (
267
                '''Content-Type: application/xhtml+xml''',
268
                '''<?xml version="1.0" encoding="iso-X" ?>
269
                    <example>
270
                        <meta http-equiv="Content-Type"
271
                            content="application/xhtml+xml;charset=iso_M"/>
272
                    </example>''')),
273

    
274

    
275
            # no text or not enough text:
276
            (('iso-h', False), ('Content-Type: application/xml;charset=iso-h',
277
                             '1')),
278
            (('utf-8', False), ('Content-Type: application/xml',
279
                                None)),
280
            ((None, False), ('Content-Type: application/xml',
281
                             '1')),
282

    
283

    
284
            # --- text/xml ---
285

    
286
            # default enc
287
            (('ascii', False), (
288
                '''Content-Type: text/xml''',
289
                '''<?xml version="1.0" ?>
290
                    <example>
291
                        <meta http-equiv="Content-Type"
292
                            content="text/xml"/>
293
                    </example>''')),
294
            # default as XML ignored and meta completely ignored
295
            (('ascii', False), (
296
                '''Content-Type: text/xml''',
297
                '''<?xml version="1.0" encoding="iso-X" ?>
298
                    <example>
299
                        <meta http-equiv="Content-Type"
300
                            content="text/xml;charset=iso_M"/>
301
                    </example>''')),
302
            (('ascii', False), ('Content-Type: text/xml',
303
                                '1')),
304
            (('ascii', False), ('Content-Type: text/xml',
305
                                None)),
306

    
307
            # header enc
308
            (('iso-h', False), (
309
                '''Content-Type: text/xml;charset=iso-H''',
310
                '''<?xml version="1.0" ?>
311
                    <example>
312
                        <meta http-equiv="Content-Type"
313
                            content="text/xml"/>
314
                    </example>''')),
315

    
316
            # header only, XML and meta ignored!
317
            (('iso-h', False), (
318
                '''Content-Type: text/xml;charset=iso-H''',
319
                '''<?xml version="1.0" encoding="iso-X" ?>
320
                    <example/>''')),
321
            (('iso-h', False), (
322
                '''Content-Type: text/xml;charset=iso-H''',
323
                '''<?xml version="1.0"  encoding="iso-h" ?>
324
                    <example>
325
                        <meta http-equiv="Content-Type"
326
                            content="text/xml;charset=iso_M"/>
327
                    </example>''')),
328

    
329

    
330
            # --- text/html ---
331

    
332
            # default enc
333
            (('iso-8859-1', False), ('Content-Type: text/html;',
334
                                     '''<meta http-equiv="Content-Type"
335
                                        content="text/html">''')),
336
            (('iso-8859-1', False), ('Content-Type: text/html;',
337
                                     None)),
338

    
339
            # header enc
340
            (('iso-h', False), ('Content-Type: text/html;charset=iso-H',
341
                                '''<meta http-equiv="Content-Type"
342
                                    content="text/html">''')),
343
            # meta enc
344
            (('iso-m', False), ('Content-Type: text/html',
345
                                '''<meta http-equiv="Content-Type"
346
                                    content="text/html;charset=iso-m">''')),
347

    
348
            # mismatch header and meta, header wins
349
            (('iso-h', True), ('Content-Type: text/html;charset=iso-H',
350
                               '''<meta http-equiv="Content-Type"
351
                                    content="text/html;charset=iso-m">''')),
352

    
353
            # no header:
354
            ((None, False), (None,
355
                             '''<meta http-equiv="Content-Type"
356
                                content="text/html;charset=iso-m">''')),
357
            # no encoding at all
358
            ((None, False), (None,
359
                             '''<meta http-equiv="Content-Type"
360
                                content="text/html">''')),
361

    
362

    
363
            ((None, False), (None,
364
                             '''text''')),
365

    
366

    
367
            # --- no header ---
368

    
369
            ((None, False), (None, '')),
370
            (('iso-8859-1', False), ('''NoContentType''',
371
                                     '''OnlyText''')),
372
            (('iso-8859-1', False), ('Content-Type: text/html;',
373
                                     None)),
374
            (('iso-8859-1', False), ('Content-Type: text/html;',
375
                                     '1')),
376

    
377
            # XML
378
            (('utf-8', False), (None,
379
                                '''<?xml version=''')),
380
            (('iso-x', False), (None,
381
                                '''<?xml version="1.0" encoding="iso-X"?>''')),
382
            # meta ignored
383
            (('utf-8', False), (None,
384
                                '''<?xml version="1.0" ?>
385
                                    <html><meta http-equiv="Content-Type"
386
                                    content="text/html;charset=iso-m"></html>''')),
387

    
388
            (('utf-8', False), ('Content-Type: text/css;',
389
                                '1')),
390
            (('iso-h', False), ('Content-Type: text/css;charset=iso-h',
391
                                '1')),
392
            # only header is used by encutils
393
            (('utf-8', False), ('Content-Type: text/css',
394
                                '@charset "ascii";')),
395

    
396
        ]
397
        for exp, test in tests:
398
            header, text = test
399
            if header:
400
                res = encutils.getEncodingInfo(self._fakeRes(header), text)
401
            else:
402
                res = encutils.getEncodingInfo(text=text)
403

    
404
            res = (res.encoding, res.mismatch)
405
            self.assertEqual(exp, res)
406

    
407

    
408
if __name__ == '__main__':
409
    unittest.main()