diff options
Diffstat (limited to 'django/utils/simplejson/encoder.py')
| -rw-r--r-- | django/utils/simplejson/encoder.py | 375 |
1 files changed, 210 insertions, 165 deletions
diff --git a/django/utils/simplejson/encoder.py b/django/utils/simplejson/encoder.py index e6c01f6138..06ebe62a3c 100644 --- a/django/utils/simplejson/encoder.py +++ b/django/utils/simplejson/encoder.py @@ -1,12 +1,9 @@ -""" -Implementation of JSONEncoder +"""Implementation of JSONEncoder """ import re -try: - from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - pass +c_encode_basestring_ascii = None +c_make_encoder = None ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') @@ -27,29 +24,9 @@ for i in range(0x20): INFINITY = float('1e66666') FLOAT_REPR = repr -def floatstr(o, allow_nan=True): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. - - if o != o: - text = 'NaN' - elif o == INFINITY: - text = 'Infinity' - elif o == -INFINITY: - text = '-Infinity' - else: - return FLOAT_REPR(o) - - if not allow_nan: - raise ValueError("Out of range float values are not JSON compliant: %r" - % (o,)) - - return text - - def encode_basestring(s): - """ - Return a JSON representation of a Python string + """Return a JSON representation of a Python string + """ def replace(match): return ESCAPE_DCT[match.group(0)] @@ -57,6 +34,9 @@ def encode_basestring(s): def py_encode_basestring_ascii(s): + """Return an ASCII-only JSON representation of a Python string + + """ if isinstance(s, str) and HAS_UTF8.search(s) is not None: s = s.decode('utf-8') def replace(match): @@ -76,18 +56,13 @@ def py_encode_basestring_ascii(s): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -try: - encode_basestring_ascii = c_encode_basestring_ascii -except NameError: - encode_basestring_ascii = py_encode_basestring_ascii - +encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii class JSONEncoder(object): - """ - Extensible JSON <http://json.org> encoder for Python data structures. + """Extensible JSON <http://json.org> encoder for Python data structures. Supports the following objects and types by default: - + +-------------------+---------------+ | Python | JSON | +===================+===============+ @@ -110,15 +85,14 @@ class JSONEncoder(object): ``.default()`` method with another method that returns a serializable object for ``o`` if possible, otherwise it should call the superclass implementation (to raise ``TypeError``). + """ - __all__ = ['__init__', 'default', 'encode', 'iterencode'] item_separator = ', ' key_separator = ': ' def __init__(self, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, encoding='utf-8', default=None): - """ - Constructor for JSONEncoder, with sensible defaults. + """Constructor for JSONEncoder, with sensible defaults. If skipkeys is False, then it is a TypeError to attempt encoding of keys that are not str, int, long, float or None. If @@ -158,6 +132,7 @@ class JSONEncoder(object): If encoding is not None, then all input strings will be transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + """ self.skipkeys = skipkeys @@ -166,17 +141,132 @@ class JSONEncoder(object): self.allow_nan = allow_nan self.sort_keys = sort_keys self.indent = indent - self.current_indent_level = 0 if separators is not None: self.item_separator, self.key_separator = separators if default is not None: self.default = default self.encoding = encoding - def _newline_indent(self): - return '\n' + (' ' * (self.indent * self.current_indent_level)) + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + return ''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, str): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + + return text + + + if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + False=False, + True=True, + ValueError=ValueError, + basestring=basestring, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + long=long, + str=str, + tuple=tuple, + ): - def _iterencode_list(self, lst, markers=None): + def _iterencode_list(lst, _current_indent_level): if not lst: yield '[]' return @@ -185,31 +275,51 @@ class JSONEncoder(object): if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = lst - yield '[' - if self.indent is not None: - self.current_indent_level += 1 - newline_indent = self._newline_indent() - separator = self.item_separator + newline_indent - yield newline_indent + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + separator = _item_separator + newline_indent + buf += newline_indent else: newline_indent = None - separator = self.item_separator + separator = _item_separator first = True for value in lst: if first: first = False else: - yield separator - for chunk in self._iterencode(value, markers): - yield chunk + buf = separator + if isinstance(value, basestring): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, (int, long)): + yield buf + str(value) + elif isinstance(value, float): + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk if newline_indent is not None: - self.current_indent_level -= 1 - yield self._newline_indent() + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) yield ']' if markers is not None: del markers[markerid] - def _iterencode_dict(self, dct, markers=None): + def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' return @@ -219,40 +329,27 @@ class JSONEncoder(object): raise ValueError("Circular reference detected") markers[markerid] = dct yield '{' - key_separator = self.key_separator - if self.indent is not None: - self.current_indent_level += 1 - newline_indent = self._newline_indent() - item_separator = self.item_separator + newline_indent + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) + item_separator = _item_separator + newline_indent yield newline_indent else: newline_indent = None - item_separator = self.item_separator + item_separator = _item_separator first = True - if self.ensure_ascii: - encoder = encode_basestring_ascii - else: - encoder = encode_basestring - allow_nan = self.allow_nan - if self.sort_keys: - keys = dct.keys() - keys.sort() - items = [(k, dct[k]) for k in keys] + if _sort_keys: + items = dct.items() + items.sort(key=lambda kv: kv[0]) else: items = dct.iteritems() - _encoding = self.encoding - _do_decode = (_encoding is not None - and not (_encoding == 'utf-8')) for key, value in items: - if isinstance(key, str): - if _do_decode: - key = key.decode(_encoding) - elif isinstance(key, basestring): + if isinstance(key, basestring): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): - key = floatstr(key, allow_nan) + key = _floatstr(key) elif isinstance(key, (int, long)): key = str(key) elif key is True: @@ -261,7 +358,7 @@ class JSONEncoder(object): key = 'false' elif key is None: key = 'null' - elif self.skipkeys: + elif _skipkeys: continue else: raise TypeError("key %r is not a string" % (key,)) @@ -269,28 +366,39 @@ class JSONEncoder(object): first = False else: yield item_separator - yield encoder(key) - yield key_separator - for chunk in self._iterencode(value, markers): - yield chunk + yield _encoder(key) + yield _key_separator + if isinstance(value, basestring): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, (int, long)): + yield str(value) + elif isinstance(value, float): + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + for chunk in chunks: + yield chunk if newline_indent is not None: - self.current_indent_level -= 1 - yield self._newline_indent() + _current_indent_level -= 1 + yield '\n' + (' ' * (_indent * _current_indent_level)) yield '}' if markers is not None: del markers[markerid] - def _iterencode(self, o, markers=None): + def _iterencode(o, _current_indent_level): if isinstance(o, basestring): - if self.ensure_ascii: - encoder = encode_basestring_ascii - else: - encoder = encode_basestring - _encoding = self.encoding - if (_encoding is not None and isinstance(o, str) - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - yield encoder(o) + yield _encoder(o) elif o is None: yield 'null' elif o is True: @@ -300,12 +408,12 @@ class JSONEncoder(object): elif isinstance(o, (int, long)): yield str(o) elif isinstance(o, float): - yield floatstr(o, self.allow_nan) + yield _floatstr(o) elif isinstance(o, (list, tuple)): - for chunk in self._iterencode_list(o, markers): + for chunk in _iterencode_list(o, _current_indent_level): yield chunk elif isinstance(o, dict): - for chunk in self._iterencode_dict(o, markers): + for chunk in _iterencode_dict(o, _current_indent_level): yield chunk else: if markers is not None: @@ -313,73 +421,10 @@ class JSONEncoder(object): if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = o - for chunk in self._iterencode_default(o, markers): + o = _default(o) + for chunk in _iterencode(o, _current_indent_level): yield chunk if markers is not None: del markers[markerid] - def _iterencode_default(self, o, markers=None): - newobj = self.default(o) - return self._iterencode(newobj, markers) - - def default(self, o): - """ - Implement this method in a subclass such that it returns - a serializable object for ``o``, or calls the base implementation - (to raise a ``TypeError``). - - For example, to support arbitrary iterators, you could - implement default like this:: - - def default(self, o): - try: - iterable = iter(o) - except TypeError: - pass - else: - return list(iterable) - return JSONEncoder.default(self, o) - """ - raise TypeError("%r is not JSON serializable" % (o,)) - - def encode(self, o): - """ - Return a JSON string representation of a Python data structure. - - >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) - '{"foo": ["bar", "baz"]}' - """ - # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) - if self.ensure_ascii: - return encode_basestring_ascii(o) - else: - return encode_basestring(o) - # This doesn't pass the iterator directly to ''.join() because the - # exceptions aren't as detailed. The list call should be roughly - # equivalent to the PySequence_Fast that ''.join() would do. - chunks = list(self.iterencode(o)) - return ''.join(chunks) - - def iterencode(self, o): - """ - Encode the given object and yield each string - representation as available. - - For example:: - - for chunk in JSONEncoder().iterencode(bigobject): - mysocket.write(chunk) - """ - if self.check_circular: - markers = {} - else: - markers = None - return self._iterencode(o, markers) - -__all__ = ['JSONEncoder'] + return _iterencode |
