[pypy-commit] pypy faster-json: Step one - remove irrelevant clutter, mostly CPython optimizations.

Wed Oct 19 13:16:05 CEST 2011

Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: faster-json
Changeset: r48226:4c970cf4ece9
Date: 2011-10-19 13:11 +0200
http://bitbucket.org/pypy/pypy/changeset/4c970cf4ece9/

Log:	Step one - remove irrelevant clutter, mostly CPython optimizations.

diff --git a/lib-python/modified-2.7/json/encoder.py b/lib-python/modified-2.7/json/encoder.py
--- a/lib-python/modified-2.7/json/encoder.py
+++ b/lib-python/modified-2.7/json/encoder.py
@@ -2,15 +2,6 @@
 """
 import re
 
-try:
-    from _json import encode_basestring_ascii as c_encode_basestring_ascii
-except ImportError:
-    c_encode_basestring_ascii = None
-try:
-    from _json import make_encoder as c_make_encoder
-except ImportError:
-    c_make_encoder = None
-
 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 HAS_UTF8 = re.compile(r'[\x80-\xff]')
@@ -24,8 +15,8 @@
     '\t': '\\t',
 }
 for i in range(0x20):
-    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
-    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+    #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
 
 # Assume this produces an infinity on all machines (probably not guaranteed)
 INFINITY = float('1e66666')
@@ -39,8 +30,7 @@
         return ESCAPE_DCT[match.group(0)]
     return '"' + ESCAPE.sub(replace, s) + '"'
 
-
-def py_encode_basestring_ascii(s):
+def encode_basestring_ascii(s):
     """Return an ASCII-only JSON representation of a Python string
 
     """
@@ -63,10 +53,8 @@
                 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
                 #return '\\u%04x\\u%04x' % (s1, s2)
     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
-
-
-encode_basestring_ascii = (
-    c_encode_basestring_ascii or py_encode_basestring_ascii)
+py_encode_basestring_ascii = encode_basestring_ascii
+c_encode_basestring_ascii = None
 
 class JSONEncoder(object):
     """Extensible JSON <http://json.org> encoder for Python data structures.
@@ -147,6 +135,10 @@
 
         self.skipkeys = skipkeys
         self.ensure_ascii = ensure_ascii
+        if ensure_ascii:
+            self.encoder = encode_basestring_ascii
+        else:
+            self.encoder = encode_basestring
         self.check_circular = check_circular
         self.allow_nan = allow_nan
         self.sort_keys = sort_keys
@@ -197,7 +189,7 @@
                 return encode_basestring(o)
         # This doesn't pass the iterator directly to ''.join() because the
         # exceptions aren't as detailed.  The list call should be roughly
-        # equivalent to the PySequence_Fast that ''.join() would do.
+        # equivalent to the PySequence_Fast that ''.join() would do.        
         chunks = self.iterencode(o, _one_shot=True)
         if not isinstance(chunks, (list, tuple)):
             chunks = list(chunks)
@@ -227,59 +219,30 @@
                     o = o.decode(_encoding)
                 return _orig_encoder(o)
 
-        def floatstr(o, allow_nan=self.allow_nan,
-                _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
-            # Check for specials.  Note that this type of test is processor
-            # and/or platform-specific, so do tests which don't depend on the
-            # internals.
+        return self._iterencode(o, markers, 0)
 
-            if o != o:
-                text = 'NaN'
-            elif o == _inf:
-                text = 'Infinity'
-            elif o == _neginf:
-                text = '-Infinity'
-            else:
-                return _repr(o)
+    def _floatstr(self, o):
+        # Check for specials.  Note that this type of test is processor
+        # and/or platform-specific, so do tests which don't depend on the
+        # internals.
 
-            if not allow_nan:
-                raise ValueError(
-                    "Out of range float values are not JSON compliant: " +
-                    repr(o))
+        if o != o:
+            text = 'NaN'
+        elif o == INFINITY:
+            text = 'Infinity'
+        elif o == -INFINITY:
+            text = '-Infinity'
+        else:
+            return FLOAT_REPR(o)
 
-            return text
+        if not self.allow_nan:
+            raise ValueError(
+                "Out of range float values are not JSON compliant: " +
+                repr(o))
 
+        return text
 
-        if (_one_shot and c_make_encoder is not None
-                and not self.indent and not self.sort_keys):
-            _iterencode = c_make_encoder(
-                markers, self.default, _encoder, self.indent,
-                self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, self.allow_nan)
-        else:
-            _iterencode = _make_iterencode(
-                markers, self.default, _encoder, self.indent, floatstr,
-                self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, _one_shot)
-        return _iterencode(o, 0)
-
-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
-        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
-        ## HACK: hand-optimized bytecode; turn globals into locals
-        ValueError=ValueError,
-        basestring=basestring,
-        dict=dict,
-        float=float,
-        id=id,
-        int=int,
-        isinstance=isinstance,
-        list=list,
-        long=long,
-        str=str,
-        tuple=tuple,
-    ):
-
-    def _iterencode_list(lst, _current_indent_level):
+    def _iterencode_list(self, lst, markers, _current_indent_level):
         if not lst:
             yield '[]'
             return
@@ -289,14 +252,15 @@
                 raise ValueError("Circular reference detected")
             markers[markerid] = lst
         buf = '['
-        if _indent is not None:
+        if self.indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            separator = _item_separator + newline_indent
+            newline_indent = '\n' + (' ' * (self.indent *
+                                            _current_indent_level))
+            separator = self.item_separator + newline_indent
             buf += newline_indent
         else:
             newline_indent = None
-            separator = _item_separator
+            separator = self.item_separator
         first = True
         for value in lst:
             if first:
@@ -304,7 +268,7 @@
             else:
                 buf = separator
             if isinstance(value, basestring):
-                yield buf + _encoder(value)
+                yield buf + self.encoder(value)
             elif value is None:
                 yield buf + 'null'
             elif value is True:
@@ -314,25 +278,28 @@
             elif isinstance(value, (int, long)):
                 yield buf + str(value)
             elif isinstance(value, float):
-                yield buf + _floatstr(value)
+                yield buf + self._floatstr(value)
             else:
                 yield buf
                 if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
+                    chunks = self._iterencode_list(value, markers,
+                                                   _current_indent_level)
                 elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+                    chunks = self._iterencode_dict(value, markers,
+                                                   _current_indent_level)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
+                    chunks = self._iterencode(value, markers,
+                                              _current_indent_level)
                 for chunk in chunks:
                     yield chunk
         if newline_indent is not None:
             _current_indent_level -= 1
-            yield '\n' + (' ' * (_indent * _current_indent_level))
+            yield '\n' + (' ' * (self.indent * _current_indent_level))
         yield ']'
         if markers is not None:
             del markers[markerid]
 
-    def _iterencode_dict(dct, _current_indent_level):
+    def _iterencode_dict(self, dct, markers, _current_indent_level):
         if not dct:
             yield '{}'
             return
@@ -342,16 +309,17 @@
                 raise ValueError("Circular reference detected")
             markers[markerid] = dct
         yield '{'
-        if _indent is not None:
+        if self.indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            item_separator = _item_separator + newline_indent
+            newline_indent = '\n' + (' ' * (self.indent *
+                                            _current_indent_level))
+            item_separator = self.item_separator + newline_indent
             yield newline_indent
         else:
             newline_indent = None
-            item_separator = _item_separator
+            item_separator = self.item_separator
         first = True
-        if _sort_keys:
+        if self.sort_keys:
             items = sorted(dct.items(), key=lambda kv: kv[0])
         else:
             items = dct.iteritems()
@@ -361,7 +329,7 @@
             # JavaScript is weakly typed for these, so it makes sense to
             # also allow them.  Many encoders seem to do something like this.
             elif isinstance(key, float):
-                key = _floatstr(key)
+                key = self._floatstr(key)
             elif key is True:
                 key = 'true'
             elif key is False:
@@ -370,7 +338,7 @@
                 key = 'null'
             elif isinstance(key, (int, long)):
                 key = str(key)
-            elif _skipkeys:
+            elif self.skipkeys:
                 continue
             else:
                 raise TypeError("key " + repr(key) + " is not a string")
@@ -378,10 +346,10 @@
                 first = False
             else:
                 yield item_separator
-            yield _encoder(key)
-            yield _key_separator
+            yield self.encoder(key)
+            yield self.key_separator
             if isinstance(value, basestring):
-                yield _encoder(value)
+                yield self.encoder(value)
             elif value is None:
                 yield 'null'
             elif value is True:
@@ -391,26 +359,29 @@
             elif isinstance(value, (int, long)):
                 yield str(value)
             elif isinstance(value, float):
-                yield _floatstr(value)
+                yield self._floatstr(value)
             else:
                 if isinstance(value, (list, tuple)):
-                    chunks = _iterencode_list(value, _current_indent_level)
+                    chunks = self._iterencode_list(value, markers,
+                                                   _current_indent_level)
                 elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
+                    chunks = self._iterencode_dict(value, markers,
+                                                   _current_indent_level)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
+                    chunks = self._iterencode(value, markers,
+                                              _current_indent_level)
                 for chunk in chunks:
                     yield chunk
         if newline_indent is not None:
             _current_indent_level -= 1
-            yield '\n' + (' ' * (_indent * _current_indent_level))
+            yield '\n' + (' ' * (self.indent * _current_indent_level))
         yield '}'
         if markers is not None:
             del markers[markerid]
 
-    def _iterencode(o, _current_indent_level):
+    def _iterencode(self, o, markers, _current_indent_level):
         if isinstance(o, basestring):
-            yield _encoder(o)
+            yield self.encoder(o)
         elif o is None:
             yield 'null'
         elif o is True:
@@ -420,12 +391,14 @@
         elif isinstance(o, (int, long)):
             yield str(o)
         elif isinstance(o, float):
-            yield _floatstr(o)
+            yield self._floatstr(o)
         elif isinstance(o, (list, tuple)):
-            for chunk in _iterencode_list(o, _current_indent_level):
+            for chunk in self._iterencode_list(o, markers,
+                                               _current_indent_level):
                 yield chunk
         elif isinstance(o, dict):
-            for chunk in _iterencode_dict(o, _current_indent_level):
+            for chunk in self._iterencode_dict(o, markers,
+                                               _current_indent_level):
                 yield chunk
         else:
             if markers is not None:
@@ -433,10 +406,10 @@
                 if markerid in markers:
                     raise ValueError("Circular reference detected")
                 markers[markerid] = o
-            o = _default(o)
-            for chunk in _iterencode(o, _current_indent_level):
+            o = self.default(o)
+            for chunk in self._iterencode(o, markers,
+                                          _current_indent_level):
                 yield chunk
             if markers is not None:
                 del markers[markerid]
 
-    return _iterencode