[pypy-commit] pypy default: Add one specific function used during encoding to _pypyjson.

arigo noreply at buildbot.pypy.org
Fri Aug 29 18:03:57 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73173:e80c25f01061
Date: 2014-08-29 18:03 +0200
http://bitbucket.org/pypy/pypy/changeset/e80c25f01061/

Log:	Add one specific function used during encoding to _pypyjson. It's a
	performance bottleneck in some cases.

diff --git a/lib-python/2.7/json/encoder.py b/lib-python/2.7/json/encoder.py
--- a/lib-python/2.7/json/encoder.py
+++ b/lib-python/2.7/json/encoder.py
@@ -529,3 +529,10 @@
                                           _current_indent_level):
                 yield chunk
             self.__remove_markers(markers, o)
+
+
+# overwrite some helpers here with more efficient versions
+try:
+    from _pypyjson import raw_encode_basestring_ascii
+except ImportError:
+    pass
diff --git a/pypy/module/_pypyjson/__init__.py b/pypy/module/_pypyjson/__init__.py
--- a/pypy/module/_pypyjson/__init__.py
+++ b/pypy/module/_pypyjson/__init__.py
@@ -7,4 +7,6 @@
 
     interpleveldefs = {
         'loads' : 'interp_decoder.loads',
+        'raw_encode_basestring_ascii':
+            'interp_encoder.raw_encode_basestring_ascii',
         }
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -0,0 +1,63 @@
+from rpython.rlib.rstring import StringBuilder
+
+
+HEX = '0123456789abcdef'
+
+ESCAPE_DICT = {
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+ESCAPE_BEFORE_SPACE = [ESCAPE_DICT.get(chr(_i), '\\u%04x' % _i)
+                       for _i in range(32)]
+
+
+def raw_encode_basestring_ascii(space, w_string):
+    if space.isinstance_w(w_string, space.w_str):
+        s = space.str_w(w_string)
+        for c in s:
+            if c >= ' ' and c <= '~' and c != '"' and c != '\\':
+                pass
+            else:
+                break
+        else:
+            # the input is a string with only non-special ascii chars
+            return w_string
+
+        w_string = space.call_method(w_string, 'decode', space.wrap('utf-8'))
+
+    u = space.unicode_w(w_string)
+    sb = StringBuilder()
+    for c in u:
+        if c <= u'~':
+            if c == u'"' or c == u'\\':
+                sb.append('\\')
+            elif c < u' ':
+                sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+                continue
+            sb.append(chr(ord(c)))
+        else:
+            if c <= u'\uffff':
+                sb.append('\\u')
+                sb.append(HEX[ord(c) >> 12])
+                sb.append(HEX[(ord(c) >> 8) & 0x0f])
+                sb.append(HEX[(ord(c) >> 4) & 0x0f])
+                sb.append(HEX[ord(c) & 0x0f])
+            else:
+                # surrogate pair
+                n = ord(c) - 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                sb.append('\\ud')
+                sb.append(HEX[(s1 >> 8) & 0x0f])
+                sb.append(HEX[(s1 >> 4) & 0x0f])
+                sb.append(HEX[s1 & 0x0f])
+                s2 = 0xdc00 | (n & 0x3ff)
+                sb.append('\\ud')
+                sb.append(HEX[(s2 >> 8) & 0x0f])
+                sb.append(HEX[(s2 >> 4) & 0x0f])
+                sb.append(HEX[s2 & 0x0f])
+
+    res = sb.build()
+    return space.wrap(res)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -188,4 +188,23 @@
         import _pypyjson
         # http://json.org/JSON_checker/test/fail25.json
         s = '["\ttab\tcharacter\tin\tstring\t"]'
-        raises(ValueError, "_pypyjson.loads(s)")
\ No newline at end of file
+        raises(ValueError, "_pypyjson.loads(s)")
+
+    def test_raw_encode_basestring_ascii(self):
+        import _pypyjson
+        def check(s):
+            s = _pypyjson.raw_encode_basestring_ascii(s)
+            assert type(s) is str
+            return s
+        assert check("") == ""
+        assert check(u"") == ""
+        assert check("abc ") == "abc "
+        assert check(u"abc ") == "abc "
+        raises(UnicodeDecodeError, check, "\xc0")
+        assert check("\xc2\x84") == "\\u0084"
+        assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"
+        assert check(u"\ud808\udf45") == "\\ud808\\udf45"
+        assert check(u"\U00012345") == "\\ud808\\udf45"
+        assert check("a\"c") == "a\\\"c"
+        assert check("\\\"\b\f\n\r\t") == '\\\\\\"\\b\\f\\n\\r\\t'
+        assert check("\x07") == "\\u0007"


More information about the pypy-commit mailing list