Statistics
| Revision:

gvsig-scripting / org.gvsig.scripting / trunk / org.gvsig.scripting / org.gvsig.scripting.app / org.gvsig.scripting.app.mainplugin / src / main / resources-plugin / scripting / lib / simplejson / encoder.py @ 545

History | View | Annotate | Download (24.7 KB)

1
"""Implementation of JSONEncoder
2
"""
3
from __future__ import absolute_import
4
import re
5
from operator import itemgetter
6
from decimal import Decimal
7
from .compat import u, unichr, binary_type, string_types, integer_types, PY3
8
def _import_speedups():
9
    try:
10
        from . import _speedups
11
        return _speedups.encode_basestring_ascii, _speedups.make_encoder
12
    except ImportError:
13
        return None, None
14
c_encode_basestring_ascii, c_make_encoder = _import_speedups()
15

    
16
from simplejson.decoder import PosInf
17

    
18
#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
19
# This is required because u() will mangle the string and ur'' isn't valid
20
# python3 syntax
21
ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
22
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
23
HAS_UTF8 = re.compile(r'[\x80-\xff]')
24
ESCAPE_DCT = {
25
    '\\': '\\\\',
26
    '"': '\\"',
27
    '\b': '\\b',
28
    '\f': '\\f',
29
    '\n': '\\n',
30
    '\r': '\\r',
31
    '\t': '\\t',
32
}
33
for i in range(0x20):
34
    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
35
    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
36
for i in [0x2028, 0x2029]:
37
    ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
38

    
39
FLOAT_REPR = repr
40

    
41
def encode_basestring(s, _PY3=PY3, _q=u('"')):
42
    """Return a JSON representation of a Python string
43

44
    """
45
    if _PY3:
46
        if isinstance(s, binary_type):
47
            s = s.decode('utf-8')
48
    else:
49
        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
50
            s = s.decode('utf-8')
51
    def replace(match):
52
        return ESCAPE_DCT[match.group(0)]
53
    return _q + ESCAPE.sub(replace, s) + _q
54

    
55

    
56
def py_encode_basestring_ascii(s, _PY3=PY3):
57
    """Return an ASCII-only JSON representation of a Python string
58

59
    """
60
    if _PY3:
61
        if isinstance(s, binary_type):
62
            s = s.decode('utf-8')
63
    else:
64
        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
65
            s = s.decode('utf-8')
66
    def replace(match):
67
        s = match.group(0)
68
        try:
69
            return ESCAPE_DCT[s]
70
        except KeyError:
71
            n = ord(s)
72
            if n < 0x10000:
73
                #return '\\u{0:04x}'.format(n)
74
                return '\\u%04x' % (n,)
75
            else:
76
                # surrogate pair
77
                n -= 0x10000
78
                s1 = 0xd800 | ((n >> 10) & 0x3ff)
79
                s2 = 0xdc00 | (n & 0x3ff)
80
                #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
81
                return '\\u%04x\\u%04x' % (s1, s2)
82
    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
83

    
84

    
85
encode_basestring_ascii = (
86
    c_encode_basestring_ascii or py_encode_basestring_ascii)
87

    
88
class JSONEncoder(object):
89
    """Extensible JSON <http://json.org> encoder for Python data structures.
90

91
    Supports the following objects and types by default:
92

93
    +-------------------+---------------+
94
    | Python            | JSON          |
95
    +===================+===============+
96
    | dict, namedtuple  | object        |
97
    +-------------------+---------------+
98
    | list, tuple       | array         |
99
    +-------------------+---------------+
100
    | str, unicode      | string        |
101
    +-------------------+---------------+
102
    | int, long, float  | number        |
103
    +-------------------+---------------+
104
    | True              | true          |
105
    +-------------------+---------------+
106
    | False             | false         |
107
    +-------------------+---------------+
108
    | None              | null          |
109
    +-------------------+---------------+
110

111
    To extend this to recognize other objects, subclass and implement a
112
    ``.default()`` method with another method that returns a serializable
113
    object for ``o`` if possible, otherwise it should call the superclass
114
    implementation (to raise ``TypeError``).
115

116
    """
117
    item_separator = ', '
118
    key_separator = ': '
119

    
120
    def __init__(self, skipkeys=False, ensure_ascii=True,
121
                 check_circular=True, allow_nan=True, sort_keys=False,
122
                 indent=None, separators=None, encoding='utf-8', default=None,
123
                 use_decimal=True, namedtuple_as_object=True,
124
                 tuple_as_array=True, bigint_as_string=False,
125
                 item_sort_key=None, for_json=False, ignore_nan=False,
126
                 int_as_string_bitcount=None):
127
        """Constructor for JSONEncoder, with sensible defaults.
128

129
        If skipkeys is false, then it is a TypeError to attempt
130
        encoding of keys that are not str, int, long, float or None.  If
131
        skipkeys is True, such items are simply skipped.
132

133
        If ensure_ascii is true, the output is guaranteed to be str
134
        objects with all incoming unicode characters escaped.  If
135
        ensure_ascii is false, the output will be unicode object.
136

137
        If check_circular is true, then lists, dicts, and custom encoded
138
        objects will be checked for circular references during encoding to
139
        prevent an infinite recursion (which would cause an OverflowError).
140
        Otherwise, no such check takes place.
141

142
        If allow_nan is true, then NaN, Infinity, and -Infinity will be
143
        encoded as such.  This behavior is not JSON specification compliant,
144
        but is consistent with most JavaScript based encoders and decoders.
145
        Otherwise, it will be a ValueError to encode such floats.
146

147
        If sort_keys is true, then the output of dictionaries will be
148
        sorted by key; this is useful for regression tests to ensure
149
        that JSON serializations can be compared on a day-to-day basis.
150

151
        If indent is a string, then JSON array elements and object members
152
        will be pretty-printed with a newline followed by that string repeated
153
        for each level of nesting. ``None`` (the default) selects the most compact
154
        representation without any newlines. For backwards compatibility with
155
        versions of simplejson earlier than 2.1.0, an integer is also accepted
156
        and is converted to a string with that many spaces.
157

158
        If specified, separators should be an (item_separator, key_separator)
159
        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
160
        (',', ': ') otherwise.  To get the most compact JSON representation,
161
        you should specify (',', ':') to eliminate whitespace.
162

163
        If specified, default is a function that gets called for objects
164
        that can't otherwise be serialized.  It should return a JSON encodable
165
        version of the object or raise a ``TypeError``.
166

167
        If encoding is not None, then all input strings will be
168
        transformed into unicode using that encoding prior to JSON-encoding.
169
        The default is UTF-8.
170

171
        If use_decimal is true (not the default), ``decimal.Decimal`` will
172
        be supported directly by the encoder. For the inverse, decode JSON
173
        with ``parse_float=decimal.Decimal``.
174

175
        If namedtuple_as_object is true (the default), objects with
176
        ``_asdict()`` methods will be encoded as JSON objects.
177

178
        If tuple_as_array is true (the default), tuple (and subclasses) will
179
        be encoded as JSON arrays.
180

181
        If bigint_as_string is true (not the default), ints 2**53 and higher
182
        or lower than -2**53 will be encoded as strings. This is to avoid the
183
        rounding that happens in Javascript otherwise.
184

185
        If int_as_string_bitcount is a positive number (n), then int of size
186
        greater than or equal to 2**n or lower than or equal to -2**n will be
187
        encoded as strings.
188

189
        If specified, item_sort_key is a callable used to sort the items in
190
        each dictionary. This is useful if you want to sort items other than
191
        in alphabetical order by key.
192

193
        If for_json is true (not the default), objects with a ``for_json()``
194
        method will use the return value of that method for encoding as JSON
195
        instead of the object.
196

197
        If *ignore_nan* is true (default: ``False``), then out of range
198
        :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
199
        as ``null`` in compliance with the ECMA-262 specification. If true,
200
        this will override *allow_nan*.
201

202
        """
203

    
204
        self.skipkeys = skipkeys
205
        self.ensure_ascii = ensure_ascii
206
        self.check_circular = check_circular
207
        self.allow_nan = allow_nan
208
        self.sort_keys = sort_keys
209
        self.use_decimal = use_decimal
210
        self.namedtuple_as_object = namedtuple_as_object
211
        self.tuple_as_array = tuple_as_array
212
        self.bigint_as_string = bigint_as_string
213
        self.item_sort_key = item_sort_key
214
        self.for_json = for_json
215
        self.ignore_nan = ignore_nan
216
        self.int_as_string_bitcount = int_as_string_bitcount
217
        if indent is not None and not isinstance(indent, string_types):
218
            indent = indent * ' '
219
        self.indent = indent
220
        if separators is not None:
221
            self.item_separator, self.key_separator = separators
222
        elif indent is not None:
223
            self.item_separator = ','
224
        if default is not None:
225
            self.default = default
226
        self.encoding = encoding
227

    
228
    def default(self, o):
229
        """Implement this method in a subclass such that it returns
230
        a serializable object for ``o``, or calls the base implementation
231
        (to raise a ``TypeError``).
232

233
        For example, to support arbitrary iterators, you could
234
        implement default like this::
235

236
            def default(self, o):
237
                try:
238
                    iterable = iter(o)
239
                except TypeError:
240
                    pass
241
                else:
242
                    return list(iterable)
243
                return JSONEncoder.default(self, o)
244

245
        """
246
        raise TypeError(repr(o) + " is not JSON serializable")
247

    
248
    def encode(self, o):
249
        """Return a JSON string representation of a Python data structure.
250

251
        >>> from simplejson import JSONEncoder
252
        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
253
        '{"foo": ["bar", "baz"]}'
254

255
        """
256
        # This is for extremely simple cases and benchmarks.
257
        if isinstance(o, binary_type):
258
            _encoding = self.encoding
259
            if (_encoding is not None and not (_encoding == 'utf-8')):
260
                o = o.decode(_encoding)
261
        if isinstance(o, string_types):
262
            if self.ensure_ascii:
263
                return encode_basestring_ascii(o)
264
            else:
265
                return encode_basestring(o)
266
        # This doesn't pass the iterator directly to ''.join() because the
267
        # exceptions aren't as detailed.  The list call should be roughly
268
        # equivalent to the PySequence_Fast that ''.join() would do.
269
        chunks = self.iterencode(o, _one_shot=True)
270
        if not isinstance(chunks, (list, tuple)):
271
            chunks = list(chunks)
272
        if self.ensure_ascii:
273
            return ''.join(chunks)
274
        else:
275
            return u''.join(chunks)
276

    
277
    def iterencode(self, o, _one_shot=False):
278
        """Encode the given object and yield each string
279
        representation as available.
280

281
        For example::
282

283
            for chunk in JSONEncoder().iterencode(bigobject):
284
                mysocket.write(chunk)
285

286
        """
287
        if self.check_circular:
288
            markers = {}
289
        else:
290
            markers = None
291
        if self.ensure_ascii:
292
            _encoder = encode_basestring_ascii
293
        else:
294
            _encoder = encode_basestring
295
        if self.encoding != 'utf-8':
296
            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
297
                if isinstance(o, binary_type):
298
                    o = o.decode(_encoding)
299
                return _orig_encoder(o)
300

    
301
        def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
302
                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
303
            # Check for specials. Note that this type of test is processor
304
            # and/or platform-specific, so do tests which don't depend on
305
            # the internals.
306

    
307
            if o != o:
308
                text = 'NaN'
309
            elif o == _inf:
310
                text = 'Infinity'
311
            elif o == _neginf:
312
                text = '-Infinity'
313
            else:
314
                return _repr(o)
315

    
316
            if ignore_nan:
317
                text = 'null'
318
            elif not allow_nan:
319
                raise ValueError(
320
                    "Out of range float values are not JSON compliant: " +
321
                    repr(o))
322

    
323
            return text
324

    
325
        key_memo = {}
326
        int_as_string_bitcount = (
327
            53 if self.bigint_as_string else self.int_as_string_bitcount)
328
        if (_one_shot and c_make_encoder is not None
329
                and self.indent is None):
330
            _iterencode = c_make_encoder(
331
                markers, self.default, _encoder, self.indent,
332
                self.key_separator, self.item_separator, self.sort_keys,
333
                self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
334
                self.namedtuple_as_object, self.tuple_as_array,
335
                int_as_string_bitcount,
336
                self.item_sort_key, self.encoding, self.for_json,
337
                self.ignore_nan, Decimal)
338
        else:
339
            _iterencode = _make_iterencode(
340
                markers, self.default, _encoder, self.indent, floatstr,
341
                self.key_separator, self.item_separator, self.sort_keys,
342
                self.skipkeys, _one_shot, self.use_decimal,
343
                self.namedtuple_as_object, self.tuple_as_array,
344
                int_as_string_bitcount,
345
                self.item_sort_key, self.encoding, self.for_json,
346
                Decimal=Decimal)
347
        try:
348
            return _iterencode(o, 0)
349
        finally:
350
            key_memo.clear()
351

    
352

    
353
class JSONEncoderForHTML(JSONEncoder):
354
    """An encoder that produces JSON safe to embed in HTML.
355

356
    To embed JSON content in, say, a script tag on a web page, the
357
    characters &, < and > should be escaped. They cannot be escaped
358
    with the usual entities (e.g. &amp;) because they are not expanded
359
    within <script> tags.
360
    """
361

    
362
    def encode(self, o):
363
        # Override JSONEncoder.encode because it has hacks for
364
        # performance that make things more complicated.
365
        chunks = self.iterencode(o, True)
366
        if self.ensure_ascii:
367
            return ''.join(chunks)
368
        else:
369
            return u''.join(chunks)
370

    
371
    def iterencode(self, o, _one_shot=False):
372
        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
373
        for chunk in chunks:
374
            chunk = chunk.replace('&', '\\u0026')
375
            chunk = chunk.replace('<', '\\u003c')
376
            chunk = chunk.replace('>', '\\u003e')
377
            yield chunk
378

    
379

    
380
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
381
        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
382
        _use_decimal, _namedtuple_as_object, _tuple_as_array,
383
        _int_as_string_bitcount, _item_sort_key,
384
        _encoding,_for_json,
385
        ## HACK: hand-optimized bytecode; turn globals into locals
386
        _PY3=PY3,
387
        ValueError=ValueError,
388
        string_types=string_types,
389
        Decimal=Decimal,
390
        dict=dict,
391
        float=float,
392
        id=id,
393
        integer_types=integer_types,
394
        isinstance=isinstance,
395
        list=list,
396
        str=str,
397
        tuple=tuple,
398
    ):
399
    if _item_sort_key and not callable(_item_sort_key):
400
        raise TypeError("item_sort_key must be None or callable")
401
    elif _sort_keys and not _item_sort_key:
402
        _item_sort_key = itemgetter(0)
403

    
404
    if (_int_as_string_bitcount is not None and
405
        (_int_as_string_bitcount <= 0 or
406
         not isinstance(_int_as_string_bitcount, integer_types))):
407
        raise TypeError("int_as_string_bitcount must be a positive integer")
408

    
409
    def _encode_int(value):
410
        skip_quoting = (
411
            _int_as_string_bitcount is None
412
            or
413
            _int_as_string_bitcount < 1
414
        )
415
        if (
416
            skip_quoting or
417
            (-1 << _int_as_string_bitcount)
418
            < value <
419
            (1 << _int_as_string_bitcount)
420
        ):
421
            return str(value)
422
        return '"' + str(value) + '"'
423

    
424
    def _iterencode_list(lst, _current_indent_level):
425
        if not lst:
426
            yield '[]'
427
            return
428
        if markers is not None:
429
            markerid = id(lst)
430
            if markerid in markers:
431
                raise ValueError("Circular reference detected")
432
            markers[markerid] = lst
433
        buf = '['
434
        if _indent is not None:
435
            _current_indent_level += 1
436
            newline_indent = '\n' + (_indent * _current_indent_level)
437
            separator = _item_separator + newline_indent
438
            buf += newline_indent
439
        else:
440
            newline_indent = None
441
            separator = _item_separator
442
        first = True
443
        for value in lst:
444
            if first:
445
                first = False
446
            else:
447
                buf = separator
448
            if (isinstance(value, string_types) or
449
                (_PY3 and isinstance(value, binary_type))):
450
                yield buf + _encoder(value)
451
            elif value is None:
452
                yield buf + 'null'
453
            elif value is True:
454
                yield buf + 'true'
455
            elif value is False:
456
                yield buf + 'false'
457
            elif isinstance(value, integer_types):
458
                yield buf + _encode_int(value)
459
            elif isinstance(value, float):
460
                yield buf + _floatstr(value)
461
            elif _use_decimal and isinstance(value, Decimal):
462
                yield buf + str(value)
463
            else:
464
                yield buf
465
                for_json = _for_json and getattr(value, 'for_json', None)
466
                if for_json and callable(for_json):
467
                    chunks = _iterencode(for_json(), _current_indent_level)
468
                elif isinstance(value, list):
469
                    chunks = _iterencode_list(value, _current_indent_level)
470
                else:
471
                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
472
                    if _asdict and callable(_asdict):
473
                        chunks = _iterencode_dict(_asdict(),
474
                                                  _current_indent_level)
475
                    elif _tuple_as_array and isinstance(value, tuple):
476
                        chunks = _iterencode_list(value, _current_indent_level)
477
                    elif isinstance(value, dict):
478
                        chunks = _iterencode_dict(value, _current_indent_level)
479
                    else:
480
                        chunks = _iterencode(value, _current_indent_level)
481
                for chunk in chunks:
482
                    yield chunk
483
        if newline_indent is not None:
484
            _current_indent_level -= 1
485
            yield '\n' + (_indent * _current_indent_level)
486
        yield ']'
487
        if markers is not None:
488
            del markers[markerid]
489

    
490
    def _stringify_key(key):
491
        if isinstance(key, string_types): # pragma: no cover
492
            pass
493
        elif isinstance(key, binary_type):
494
            key = key.decode(_encoding)
495
        elif isinstance(key, float):
496
            key = _floatstr(key)
497
        elif key is True:
498
            key = 'true'
499
        elif key is False:
500
            key = 'false'
501
        elif key is None:
502
            key = 'null'
503
        elif isinstance(key, integer_types):
504
            key = str(key)
505
        elif _use_decimal and isinstance(key, Decimal):
506
            key = str(key)
507
        elif _skipkeys:
508
            key = None
509
        else:
510
            raise TypeError("key " + repr(key) + " is not a string")
511
        return key
512

    
513
    def _iterencode_dict(dct, _current_indent_level):
514
        if not dct:
515
            yield '{}'
516
            return
517
        if markers is not None:
518
            markerid = id(dct)
519
            if markerid in markers:
520
                raise ValueError("Circular reference detected")
521
            markers[markerid] = dct
522
        yield '{'
523
        if _indent is not None:
524
            _current_indent_level += 1
525
            newline_indent = '\n' + (_indent * _current_indent_level)
526
            item_separator = _item_separator + newline_indent
527
            yield newline_indent
528
        else:
529
            newline_indent = None
530
            item_separator = _item_separator
531
        first = True
532
        if _PY3:
533
            iteritems = dct.items()
534
        else:
535
            iteritems = dct.iteritems()
536
        if _item_sort_key:
537
            items = []
538
            for k, v in dct.items():
539
                if not isinstance(k, string_types):
540
                    k = _stringify_key(k)
541
                    if k is None:
542
                        continue
543
                items.append((k, v))
544
            items.sort(key=_item_sort_key)
545
        else:
546
            items = iteritems
547
        for key, value in items:
548
            if not (_item_sort_key or isinstance(key, string_types)):
549
                key = _stringify_key(key)
550
                if key is None:
551
                    # _skipkeys must be True
552
                    continue
553
            if first:
554
                first = False
555
            else:
556
                yield item_separator
557
            yield _encoder(key)
558
            yield _key_separator
559
            if (isinstance(value, string_types) or
560
                (_PY3 and isinstance(value, binary_type))):
561
                yield _encoder(value)
562
            elif value is None:
563
                yield 'null'
564
            elif value is True:
565
                yield 'true'
566
            elif value is False:
567
                yield 'false'
568
            elif isinstance(value, integer_types):
569
                yield _encode_int(value)
570
            elif isinstance(value, float):
571
                yield _floatstr(value)
572
            elif _use_decimal and isinstance(value, Decimal):
573
                yield str(value)
574
            else:
575
                for_json = _for_json and getattr(value, 'for_json', None)
576
                if for_json and callable(for_json):
577
                    chunks = _iterencode(for_json(), _current_indent_level)
578
                elif isinstance(value, list):
579
                    chunks = _iterencode_list(value, _current_indent_level)
580
                else:
581
                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
582
                    if _asdict and callable(_asdict):
583
                        chunks = _iterencode_dict(_asdict(),
584
                                                  _current_indent_level)
585
                    elif _tuple_as_array and isinstance(value, tuple):
586
                        chunks = _iterencode_list(value, _current_indent_level)
587
                    elif isinstance(value, dict):
588
                        chunks = _iterencode_dict(value, _current_indent_level)
589
                    else:
590
                        chunks = _iterencode(value, _current_indent_level)
591
                for chunk in chunks:
592
                    yield chunk
593
        if newline_indent is not None:
594
            _current_indent_level -= 1
595
            yield '\n' + (_indent * _current_indent_level)
596
        yield '}'
597
        if markers is not None:
598
            del markers[markerid]
599

    
600
    def _iterencode(o, _current_indent_level):
601
        if (isinstance(o, string_types) or
602
            (_PY3 and isinstance(o, binary_type))):
603
            yield _encoder(o)
604
        elif o is None:
605
            yield 'null'
606
        elif o is True:
607
            yield 'true'
608
        elif o is False:
609
            yield 'false'
610
        elif isinstance(o, integer_types):
611
            yield _encode_int(o)
612
        elif isinstance(o, float):
613
            yield _floatstr(o)
614
        else:
615
            for_json = _for_json and getattr(o, 'for_json', None)
616
            if for_json and callable(for_json):
617
                for chunk in _iterencode(for_json(), _current_indent_level):
618
                    yield chunk
619
            elif isinstance(o, list):
620
                for chunk in _iterencode_list(o, _current_indent_level):
621
                    yield chunk
622
            else:
623
                _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
624
                if _asdict and callable(_asdict):
625
                    for chunk in _iterencode_dict(_asdict(),
626
                            _current_indent_level):
627
                        yield chunk
628
                elif (_tuple_as_array and isinstance(o, tuple)):
629
                    for chunk in _iterencode_list(o, _current_indent_level):
630
                        yield chunk
631
                elif isinstance(o, dict):
632
                    for chunk in _iterencode_dict(o, _current_indent_level):
633
                        yield chunk
634
                elif _use_decimal and isinstance(o, Decimal):
635
                    yield str(o)
636
                else:
637
                    if markers is not None:
638
                        markerid = id(o)
639
                        if markerid in markers:
640
                            raise ValueError("Circular reference detected")
641
                        markers[markerid] = o
642
                    o = _default(o)
643
                    for chunk in _iterencode(o, _current_indent_level):
644
                        yield chunk
645
                    if markers is not None:
646
                        del markers[markerid]
647

    
648
    return _iterencode