[pypy-commit] pypy fastjson: here are dragons: valgrind shows that a good percentage of time was spent in

antocuni noreply at buildbot.pypy.org
Sun Jun 9 11:41:47 CEST 2013


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fastjson
Changeset: r64828:c9e7d8554e5f
Date: 2013-06-06 15:33 +0200
http://bitbucket.org/pypy/pypy/changeset/c9e7d8554e5f/

Log:	here are dragons: valgrind shows that a good percentage of time was
	spent in taking the slice + converting it to unicode. Instead, we
	directly create an unicode string by copying the relevant characters
	from the original string, but we need to go to the level of "low
	level helpers" to do that, with llstr&co.

diff --git a/pypy/module/_fastjson/interp_decoder.py b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -19,6 +19,19 @@
 TYPE_UNKNOWN = 0
 TYPE_STRING = 1
 
+def strslice2unicode_ascii(s, start, end):
+    from rpython.rtyper.annlowlevel import llstr, hlunicode
+    from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
+    from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
+    length = end-start
+    ll_s = llstr(s)
+    ll_res = malloc(UNICODE, length)
+    ll_res.hash = 0
+    for i in range(length):
+        ch = ll_s.chars[start+i]
+        ll_res.chars[i] = cast_primitive(UniChar, ch)
+    return hlunicode(ll_res)
+
 class JSONDecoder(object):
     def __init__(self, space, s):
         self.space = space
@@ -252,13 +265,13 @@
             i += 1
             bits |= ord(ch)
             if ch == '"':
-                content_utf8 = self.getslice(start, i-1)
                 if bits & 0x80:
                     # the 8th bit is set, it's an utf8 strnig
+                    content_utf8 = self.getslice(start, i-1)
                     content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
                 else:
-                    # ascii only, faster to decode
-                    content_unicode = content_utf8.decode('ascii')
+                    # ascii only, fast path
+                    content_unicode = strslice2unicode_ascii(self.s, start, i-1)
                 self.last_type = TYPE_STRING
                 self.i = i
                 return self.space.wrap(content_unicode)


More information about the pypy-commit mailing list