[pypy-commit] pypy fastjson: put a \0 sentinel at the end of the string: the cost of the string copy is neligible but this let us to avoid lots of eof() checks during the parsing. Also, relax the dependency on self.pos, and explicitly pass the current index around

Sun Jun 9 11:41:57 CEST 2013

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fastjson
Changeset: r64834:f642cad5d507
Date: 2013-06-07 01:13 +0200
http://bitbucket.org/pypy/pypy/changeset/f642cad5d507/

Log:	put a \0 sentinel at the end of the string: the cost of the string
	copy is neligible but this let us to avoid lots of eof() checks
	during the parsing. Also, relax the dependency on self.pos, and
	explicitly pass the current index around

diff --git a/pypy/module/_fastjson/interp_decoder.py b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -4,6 +4,7 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter import unicodehelper
+from rpython.rtyper.annlowlevel import llstr, hlunicode
 
 def is_whitespace(ch):
     return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n'
@@ -88,62 +89,54 @@
     def _raise(self, msg, *args):
         raise operationerrfmt(self.space.w_ValueError, msg, *args)
 
-    def decode_any(self):
-        self.pos = self.skip_whitespace(self.pos)
-        ch = self.peek()
+    def decode_any(self, i):
+        i = self.skip_whitespace(i)
+        ch = self.s[i]
         if ch == '"':
-            self.next()
-            return self.decode_string()
+            return self.decode_string(i+1)
         elif ch.isdigit() or ch == '-':
-            return self.decode_numeric()
+            return self.decode_numeric(i)
         elif ch == '[':
-            self.next()
-            return self.decode_array()
+            return self.decode_array(i+1)
         elif ch == '{':
-            self.next()
-            return self.decode_object()
+            return self.decode_object(i+1)
         elif ch == 'n':
-            self.next()
-            return self.decode_null()
+            return self.decode_null(i+1)
         elif ch == 't':
-            self.next()
-            return self.decode_true()
+            return self.decode_true(i+1)
         elif ch == 'f':
-            self.next()
-            return self.decode_false()
+            return self.decode_false(i+1)
         else:
             self._raise("No JSON object could be decoded: unexpected '%s' at char %d",
                         ch, self.pos)
 
-    def decode_null(self):
-        N = len('ull')
-        if (self.pos+N <= len(self.s) and
-            self.next() == 'u' and
-            self.next() == 'l' and
-            self.next() == 'l'):
+    def decode_null(self, i):
+        if (self.s[i]   == 'u' and
+            self.s[i+1] == 'l' and
+            self.s[i+2] == 'l'):
+            self.pos = i+3
             return self.space.w_None
-        self._raise("Error when decoding null at char %d", self.pos)
+        self._raise("Error when decoding null at char %d", i)
 
-    def decode_true(self):
-        N = len('rue')
-        if (self.pos+N <= len(self.s) and
-            self.next() == 'r' and
-            self.next() == 'u' and
-            self.next() == 'e'):
+    def decode_true(self, i):
+        if (self.s[i]   == 'r' and
+            self.s[i+1] == 'u' and
+            self.s[i+2] == 'e'):
+            self.pos = i+3
             return self.space.w_True
-        self._raise("Error when decoding true at char %d", self.pos)
+        self._raise("Error when decoding true at char %d", i)
 
-    def decode_false(self):
-        N = len('alse')
-        if (self.pos+N <= len(self.s) and
-            self.next() == 'a' and
-            self.next() == 'l' and
-            self.next() == 's' and
-            self.next() == 'e'):
+    def decode_false(self, i):
+        if (self.s[i]   == 'a' and
+            self.s[i+1] == 'l' and
+            self.s[i+2] == 's' and
+            self.s[i+3] == 'e'):
+            self.pos = i+4
             return self.space.w_False
-        self._raise("Error when decoding false at char %d", self.pos)
+        self._raise("Error when decoding false at char %d", i)
 
-    def decode_numeric(self):
+    def decode_numeric(self, i):
+        self.pos = i
         intval = self.parse_integer()
         #
         is_float = False
@@ -201,22 +194,20 @@
         self.pos = i
         return intval, count
         
-    def decode_array(self):
+    def decode_array(self, i):
         w_list = self.space.newlist([])
-        start = self.pos
+        start = i
+        count = 0
         i = self.skip_whitespace(start)
         while i < len(self.s):
             ch = self.s[i]
             if ch == ']':
                 self.pos = i+1
                 return w_list
-            self.pos = i
-            w_item = self.decode_any()
+            w_item = self.decode_any(i)
             i = self.pos
             self.space.call_method(w_list, 'append', w_item)
             i = self.skip_whitespace(i)
-            if i == len(self.s):
-                break
             ch = self.s[i]
             i += 1
             if ch == ']':
@@ -230,35 +221,34 @@
         self._raise("Unterminated array starting at char %d", start)
 
 
-    def decode_object(self):
-        start = self.pos
+    def decode_object(self, i):
+        start = i
         w_dict = self.space.newdict()
-        while not self.eof():
-            ch = self.peek()
+        while i < len(self.s):
+            ch = self.s[i]
             if ch == '}':
-                self.next()
+                self.pos = i+1
                 return w_dict
             #
             # parse a key: value
             self.last_type = TYPE_UNKNOWN
-            w_name = self.decode_any()
+            w_name = self.decode_any(i)
             if self.last_type != TYPE_STRING:
                 self._raise("Key name must be string for object starting at char %d", start)
-            self.pos = self.skip_whitespace(self.pos)
-            if self.eof():
-                break
-            ch = self.next()
+            i = self.skip_whitespace(self.pos)
+            ch = self.s[i]
             if ch != ':':
-                self._raise("No ':' found at char %d", self.pos)
-            self.pos = self.skip_whitespace(self.pos)
+                self._raise("No ':' found at char %d", i)
+            i += 1
+            i = self.skip_whitespace(i)
             #
-            w_value = self.decode_any()
+            w_value = self.decode_any(i)
             self.space.setitem(w_dict, w_name, w_value)
-            self.pos = self.skip_whitespace(self.pos)
-            if self.eof():
-                break
-            ch = self.next()
+            i = self.skip_whitespace(self.pos)
+            ch = self.s[i]
+            i += 1
             if ch == '}':
+                self.pos = i
                 return w_dict
             elif ch == ',':
                 pass
@@ -267,9 +257,8 @@
                             ch, self.pos)
         self._raise("Unterminated object starting at char %d", start)
 
-    def decode_string(self):
-        start = self.pos
-        i = self.pos
+    def decode_string(self, i):
+        start = i
         bits = 0
         while i < len(self.s):
             # this loop is a fast path for strings which do not contain escape
@@ -348,11 +337,13 @@
 
 @unwrap_spec(s=str)
 def loads(space, s):
+    # the '\0' serves as a sentinel, so that we can avoid the bound check
+    s = s + '\0'
     decoder = JSONDecoder(space, s)
-    w_res = decoder.decode_any()
-    decoder.pos = decoder.skip_whitespace(decoder.pos)
-    if not decoder.eof():
-        start = decoder.pos
-        end = len(decoder.s)
+    w_res = decoder.decode_any(0)
+    i = decoder.skip_whitespace(decoder.pos)
+    if s[i] != '\0':
+        start = i
+        end = len(s) - 1
         raise operationerrfmt(space.w_ValueError, "Extra data: char %d - %d", start, end)
     return w_res