[pypy-commit] pypy default: Add one specific function used during encoding to _pypyjson.
arigo
noreply at buildbot.pypy.org
Fri Aug 29 18:03:57 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r73173:e80c25f01061
Date: 2014-08-29 18:03 +0200
http://bitbucket.org/pypy/pypy/changeset/e80c25f01061/
Log: Add one specific function used during encoding to _pypyjson. It's a
performance bottleneck in some cases.
diff --git a/lib-python/2.7/json/encoder.py b/lib-python/2.7/json/encoder.py
--- a/lib-python/2.7/json/encoder.py
+++ b/lib-python/2.7/json/encoder.py
@@ -529,3 +529,10 @@
_current_indent_level):
yield chunk
self.__remove_markers(markers, o)
+
+
+# overwrite some helpers here with more efficient versions
+try:
+ from _pypyjson import raw_encode_basestring_ascii
+except ImportError:
+ pass
diff --git a/pypy/module/_pypyjson/__init__.py b/pypy/module/_pypyjson/__init__.py
--- a/pypy/module/_pypyjson/__init__.py
+++ b/pypy/module/_pypyjson/__init__.py
@@ -7,4 +7,6 @@
interpleveldefs = {
'loads' : 'interp_decoder.loads',
+ 'raw_encode_basestring_ascii':
+ 'interp_encoder.raw_encode_basestring_ascii',
}
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -0,0 +1,63 @@
+from rpython.rlib.rstring import StringBuilder
+
+
+HEX = '0123456789abcdef'
+
+ESCAPE_DICT = {
+ '\b': '\\b',
+ '\f': '\\f',
+ '\n': '\\n',
+ '\r': '\\r',
+ '\t': '\\t',
+}
+ESCAPE_BEFORE_SPACE = [ESCAPE_DICT.get(chr(_i), '\\u%04x' % _i)
+ for _i in range(32)]
+
+
+def raw_encode_basestring_ascii(space, w_string):
+ if space.isinstance_w(w_string, space.w_str):
+ s = space.str_w(w_string)
+ for c in s:
+ if c >= ' ' and c <= '~' and c != '"' and c != '\\':
+ pass
+ else:
+ break
+ else:
+ # the input is a string with only non-special ascii chars
+ return w_string
+
+ w_string = space.call_method(w_string, 'decode', space.wrap('utf-8'))
+
+ u = space.unicode_w(w_string)
+ sb = StringBuilder()
+ for c in u:
+ if c <= u'~':
+ if c == u'"' or c == u'\\':
+ sb.append('\\')
+ elif c < u' ':
+ sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+ continue
+ sb.append(chr(ord(c)))
+ else:
+ if c <= u'\uffff':
+ sb.append('\\u')
+ sb.append(HEX[ord(c) >> 12])
+ sb.append(HEX[(ord(c) >> 8) & 0x0f])
+ sb.append(HEX[(ord(c) >> 4) & 0x0f])
+ sb.append(HEX[ord(c) & 0x0f])
+ else:
+ # surrogate pair
+ n = ord(c) - 0x10000
+ s1 = 0xd800 | ((n >> 10) & 0x3ff)
+ sb.append('\\ud')
+ sb.append(HEX[(s1 >> 8) & 0x0f])
+ sb.append(HEX[(s1 >> 4) & 0x0f])
+ sb.append(HEX[s1 & 0x0f])
+ s2 = 0xdc00 | (n & 0x3ff)
+ sb.append('\\ud')
+ sb.append(HEX[(s2 >> 8) & 0x0f])
+ sb.append(HEX[(s2 >> 4) & 0x0f])
+ sb.append(HEX[s2 & 0x0f])
+
+ res = sb.build()
+ return space.wrap(res)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -188,4 +188,23 @@
import _pypyjson
# http://json.org/JSON_checker/test/fail25.json
s = '["\ttab\tcharacter\tin\tstring\t"]'
- raises(ValueError, "_pypyjson.loads(s)")
\ No newline at end of file
+ raises(ValueError, "_pypyjson.loads(s)")
+
+ def test_raw_encode_basestring_ascii(self):
+ import _pypyjson
+ def check(s):
+ s = _pypyjson.raw_encode_basestring_ascii(s)
+ assert type(s) is str
+ return s
+ assert check("") == ""
+ assert check(u"") == ""
+ assert check("abc ") == "abc "
+ assert check(u"abc ") == "abc "
+ raises(UnicodeDecodeError, check, "\xc0")
+ assert check("\xc2\x84") == "\\u0084"
+ assert check("\xf0\x92\x8d\x85") == "\\ud808\\udf45"
+ assert check(u"\ud808\udf45") == "\\ud808\\udf45"
+ assert check(u"\U00012345") == "\\ud808\\udf45"
+ assert check("a\"c") == "a\\\"c"
+ assert check("\\\"\b\f\n\r\t") == '\\\\\\"\\b\\f\\n\\r\\t'
+ assert check("\x07") == "\\u0007"
More information about the pypy-commit
mailing list