gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / simplejson / tests / test_scanstring.py @ 545
History | View | Annotate | Download (7.14 KB)
1 |
import sys |
---|---|
2 |
from unittest import TestCase |
3 |
|
4 |
import simplejson as json |
5 |
import simplejson.decoder |
6 |
from simplejson.compat import b, PY3 |
7 |
|
8 |
class TestScanString(TestCase): |
9 |
# The bytes type is intentionally not used in most of these tests
|
10 |
# under Python 3 because the decoder immediately coerces to str before
|
11 |
# calling scanstring. In Python 2 we are testing the code paths
|
12 |
# for both unicode and str.
|
13 |
#
|
14 |
# The reason this is done is because Python 3 would require
|
15 |
# entirely different code paths for parsing bytes and str.
|
16 |
#
|
17 |
def test_py_scanstring(self): |
18 |
self._test_scanstring(simplejson.decoder.py_scanstring)
|
19 |
|
20 |
def test_c_scanstring(self): |
21 |
if not simplejson.decoder.c_scanstring: |
22 |
return
|
23 |
self._test_scanstring(simplejson.decoder.c_scanstring)
|
24 |
|
25 |
def _test_scanstring(self, scanstring): |
26 |
if sys.maxunicode == 65535: |
27 |
self.assertEqual(
|
28 |
scanstring(u'"z\U0001d120x"', 1, None, True), |
29 |
(u'z\U0001d120x', 6)) |
30 |
else:
|
31 |
self.assertEqual(
|
32 |
scanstring(u'"z\U0001d120x"', 1, None, True), |
33 |
(u'z\U0001d120x', 5)) |
34 |
|
35 |
self.assertEqual(
|
36 |
scanstring('"\\u007b"', 1, None, True), |
37 |
(u'{', 8)) |
38 |
|
39 |
self.assertEqual(
|
40 |
scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True), |
41 |
(u'A JSON payload should be an object or array, not a string.', 60)) |
42 |
|
43 |
self.assertEqual(
|
44 |
scanstring('["Unclosed array"', 2, None, True), |
45 |
(u'Unclosed array', 17)) |
46 |
|
47 |
self.assertEqual(
|
48 |
scanstring('["extra comma",]', 2, None, True), |
49 |
(u'extra comma', 14)) |
50 |
|
51 |
self.assertEqual(
|
52 |
scanstring('["double extra comma",,]', 2, None, True), |
53 |
(u'double extra comma', 21)) |
54 |
|
55 |
self.assertEqual(
|
56 |
scanstring('["Comma after the close"],', 2, None, True), |
57 |
(u'Comma after the close', 24)) |
58 |
|
59 |
self.assertEqual(
|
60 |
scanstring('["Extra close"]]', 2, None, True), |
61 |
(u'Extra close', 14)) |
62 |
|
63 |
self.assertEqual(
|
64 |
scanstring('{"Extra comma": true,}', 2, None, True), |
65 |
(u'Extra comma', 14)) |
66 |
|
67 |
self.assertEqual(
|
68 |
scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True), |
69 |
(u'Extra value after close', 26)) |
70 |
|
71 |
self.assertEqual(
|
72 |
scanstring('{"Illegal expression": 1 + 2}', 2, None, True), |
73 |
(u'Illegal expression', 21)) |
74 |
|
75 |
self.assertEqual(
|
76 |
scanstring('{"Illegal invocation": alert()}', 2, None, True), |
77 |
(u'Illegal invocation', 21)) |
78 |
|
79 |
self.assertEqual(
|
80 |
scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True), |
81 |
(u'Numbers cannot have leading zeroes', 37)) |
82 |
|
83 |
self.assertEqual(
|
84 |
scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True), |
85 |
(u'Numbers cannot be hex', 24)) |
86 |
|
87 |
self.assertEqual(
|
88 |
scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True), |
89 |
(u'Too deep', 30)) |
90 |
|
91 |
self.assertEqual(
|
92 |
scanstring('{"Missing colon" null}', 2, None, True), |
93 |
(u'Missing colon', 16)) |
94 |
|
95 |
self.assertEqual(
|
96 |
scanstring('{"Double colon":: null}', 2, None, True), |
97 |
(u'Double colon', 15)) |
98 |
|
99 |
self.assertEqual(
|
100 |
scanstring('{"Comma instead of colon", null}', 2, None, True), |
101 |
(u'Comma instead of colon', 25)) |
102 |
|
103 |
self.assertEqual(
|
104 |
scanstring('["Colon instead of comma": false]', 2, None, True), |
105 |
(u'Colon instead of comma', 25)) |
106 |
|
107 |
self.assertEqual(
|
108 |
scanstring('["Bad value", truth]', 2, None, True), |
109 |
(u'Bad value', 12)) |
110 |
|
111 |
for c in map(chr, range(0x00, 0x1f)): |
112 |
self.assertEqual(
|
113 |
scanstring(c + '"', 0, None, False), |
114 |
(c, 2))
|
115 |
self.assertRaises(
|
116 |
ValueError,
|
117 |
scanstring, c + '"', 0, None, True) |
118 |
|
119 |
self.assertRaises(ValueError, scanstring, '', 0, None, True) |
120 |
self.assertRaises(ValueError, scanstring, 'a', 0, None, True) |
121 |
self.assertRaises(ValueError, scanstring, '\\', 0, None, True) |
122 |
self.assertRaises(ValueError, scanstring, '\\u', 0, None, True) |
123 |
self.assertRaises(ValueError, scanstring, '\\u0', 0, None, True) |
124 |
self.assertRaises(ValueError, scanstring, '\\u01', 0, None, True) |
125 |
self.assertRaises(ValueError, scanstring, '\\u012', 0, None, True) |
126 |
self.assertRaises(ValueError, scanstring, '\\u0123', 0, None, True) |
127 |
if sys.maxunicode > 65535: |
128 |
self.assertRaises(ValueError, |
129 |
scanstring, '\\ud834\\u"', 0, None, True) |
130 |
self.assertRaises(ValueError, |
131 |
scanstring, '\\ud834\\x0123"', 0, None, True) |
132 |
|
133 |
def test_issue3623(self): |
134 |
self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1, |
135 |
"xxx")
|
136 |
self.assertRaises(UnicodeDecodeError, |
137 |
json.encoder.encode_basestring_ascii, b("xx\xff"))
|
138 |
|
139 |
def test_overflow(self): |
140 |
# Python 2.5 does not have maxsize, Python 3 does not have maxint
|
141 |
maxsize = getattr(sys, 'maxsize', getattr(sys, 'maxint', None)) |
142 |
assert maxsize is not None |
143 |
self.assertRaises(OverflowError, json.decoder.scanstring, "xxx", |
144 |
maxsize + 1)
|
145 |
|
146 |
def test_surrogates(self): |
147 |
scanstring = json.decoder.scanstring |
148 |
|
149 |
def assertScan(given, expect, test_utf8=True): |
150 |
givens = [given] |
151 |
if not PY3 and test_utf8: |
152 |
givens.append(given.encode('utf8'))
|
153 |
for given in givens: |
154 |
(res, count) = scanstring(given, 1, None, True) |
155 |
self.assertEqual(len(given), count) |
156 |
self.assertEqual(res, expect)
|
157 |
|
158 |
assertScan( |
159 |
u'"z\\ud834\\u0079x"',
|
160 |
u'z\ud834yx')
|
161 |
assertScan( |
162 |
u'"z\\ud834\\udd20x"',
|
163 |
u'z\U0001d120x')
|
164 |
assertScan( |
165 |
u'"z\\ud834\\ud834\\udd20x"',
|
166 |
u'z\ud834\U0001d120x')
|
167 |
assertScan( |
168 |
u'"z\\ud834x"',
|
169 |
u'z\ud834x')
|
170 |
assertScan( |
171 |
u'"z\\udd20x"',
|
172 |
u'z\udd20x')
|
173 |
assertScan( |
174 |
u'"z\ud834x"',
|
175 |
u'z\ud834x')
|
176 |
# It may look strange to join strings together, but Python is drunk.
|
177 |
# https://gist.github.com/etrepum/5538443
|
178 |
assertScan( |
179 |
u'"z\\ud834\udd20x12345"',
|
180 |
u''.join([u'z\ud834', u'\udd20x12345'])) |
181 |
assertScan( |
182 |
u'"z\ud834\\udd20x"',
|
183 |
u''.join([u'z\ud834', u'\udd20x'])) |
184 |
# these have different behavior given UTF8 input, because the surrogate
|
185 |
# pair may be joined (in maxunicode > 65535 builds)
|
186 |
assertScan( |
187 |
u''.join([u'"z\ud834', u'\udd20x"']), |
188 |
u''.join([u'z\ud834', u'\udd20x']), |
189 |
test_utf8=False)
|
190 |
|
191 |
self.assertRaises(ValueError, |
192 |
scanstring, u'"z\\ud83x"', 1, None, True) |
193 |
self.assertRaises(ValueError, |
194 |
scanstring, u'"z\\ud834\\udd2x"', 1, None, True) |