[pypy-commit] pypy fastjson: merge heads

Fri Jun 28 16:28:11 CEST 2013

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fastjson
Changeset: r65068:54438eb6c42c
Date: 2013-06-28 15:49 +0200
http://bitbucket.org/pypy/pypy/changeset/54438eb6c42c/

Log:	merge heads

diff --git a/pypy/module/_fastjson/interp_decoder.py b/pypy/module/_fastjson/interp_decoder.py
--- a/pypy/module/_fastjson/interp_decoder.py
+++ b/pypy/module/_fastjson/interp_decoder.py
@@ -1,11 +1,15 @@
+import sys
 import math
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.objectmodel import specialize
+from rpython.rlib import rfloat
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.interpreter.gateway import unwrap_spec
 from pypy.interpreter import unicodehelper
 from rpython.rtyper.annlowlevel import llstr, hlunicode
 
+OVF_DIGITS = len(str(sys.maxint))
+
 def is_whitespace(ch):
     return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n'
 
@@ -53,8 +57,8 @@
         self.last_type = TYPE_UNKNOWN
 
     def getslice(self, start, end):
-        assert start > 0
-        assert end > 0
+        assert start >= 0
+        assert end >= 0
         return self.s[start:end]
 
     def skip_whitespace(self, i):
@@ -117,7 +121,20 @@
         self._raise("Error when decoding false at char %d", i)
 
     def decode_numeric(self, i):
-        i, intval = self.parse_integer(i)
+        w_res = self.decode_numeric_fast(i)
+        if w_res is self.space.w_None:
+            # possible overflow, reparse it
+            return self.decode_numeric_slow(i)
+        return w_res
+
+    def decode_numeric_fast(self, i):
+        i, ovf_maybe, intval = self.parse_integer(i, allow_leading_0=False)
+        if ovf_maybe:
+            # apparently we get a ~30% slowdown on my microbenchmark if we
+            # return None instead of w_None, probably because the annotation
+            # of the results geta can_be_None=True. We need to check if this
+            # is still true also for the full pypy
+            return self.space.w_None
         #
         is_float = False
         exp = 0
@@ -134,28 +151,72 @@
         # check for the optional exponent part
         if ch == 'E' or ch == 'e':
             is_float = True
-            i, exp = self.parse_integer(i+1)
+            i, ovf_maybe, exp = self.parse_integer(i+1, allow_leading_0=True)
         #
         self.pos = i
         if is_float:
             # build the float
             floatval = intval + frcval
             if exp != 0:
-                floatval = floatval * math.pow(10, exp)
+                try:
+                    floatval = floatval * math.pow(10, exp)
+                except OverflowError:
+                    floatval = rfloat.INFINITY
             return self.space.wrap(floatval)
         else:
             return self.space.wrap(intval)
 
-    def parse_integer(self, i):
+    def decode_numeric_slow(self, i):
+        is_float = False
+        start = i
+        if self.ll_chars[i] == '-':
+            i += 1
+        # skip the integral part
+        while self.ll_chars[i].isdigit():
+            i += 1
+        #
+        # skip the fractional part, if any
+        if self.ll_chars[i] == '.':
+            is_float = True
+            i += 1
+            while self.ll_chars[i].isdigit():
+                i += 1
+        #
+        # skip the exponent part, if any
+        if self.ll_chars[i] == 'e' or self.ll_chars[i] == 'E':
+            is_float = True
+            i += 1
+            while self.ll_chars[i].isdigit():
+                i += 1
+        #
+        self.pos = i
+        s = self.getslice(start, i)
+        if is_float:
+            w_func = self.space.w_float
+        else:
+            w_func = self.space.w_int
+        w_res = self.space.call_function(w_func, self.space.wrap(s))
+        #assert w_res is not None # XXX check if this brings any speedup in pypy-c
+        return w_res
+
+    def parse_integer(self, i, allow_leading_0=False):
         "Parse a decimal number with an optional minus sign"
+        start = i
         sign = 1
         if self.ll_chars[i] == '-':
             sign = -1
             i += 1
         elif self.ll_chars[i] == '+':
             i += 1
+        elif not allow_leading_0 and self.ll_chars[i] == '0':
+            i += 1
+            return i, False, 0
         i, intval, _ = self.parse_digits(i)
-        return i, sign * intval
+        # if the number has more digits than OVF_DIGITS, it might have
+        # overflowed
+        ovf_maybe = (i-start >= OVF_DIGITS)
+        return i, ovf_maybe, sign * intval
+    parse_integer._always_inline_ = True
 
     def parse_digits(self, i):
         "Parse a sequence of digits as a decimal number. No sign allowed"
@@ -172,7 +233,7 @@
         if count == 0:
             self._raise("Expected digit at char %d", i)
         return i, intval, count
-        
+
     def decode_array(self, i):
         w_list = self.space.newlist([])
         start = i
diff --git a/pypy/module/_fastjson/test/test__fastjson.py b/pypy/module/_fastjson/test/test__fastjson.py
--- a/pypy/module/_fastjson/test/test__fastjson.py
+++ b/pypy/module/_fastjson/test/test__fastjson.py
@@ -99,6 +99,7 @@
 
 
     def test_decode_numeric(self):
+        import sys
         import _fastjson
         def check(s, val):
             res = _fastjson.loads(s)
@@ -113,6 +114,20 @@
         check('42E-1', 4.2)
         check('42E+1', 420.0)
         check('42.123E3', 42123.0)
+        check('0', 0)
+        check('-0', 0)
+        check('0.123', 0.123)
+        check('0E3', 0.0)
+        check('5E0001', 50.0)
+        check(str(1 << 32), 1 << 32)
+        check(str(1 << 64), 1 << 64)
+        #
+        x = str(sys.maxint+1) + '.123'
+        check(x, float(x))
+        x = str(sys.maxint+1) + 'E1'
+        check(x, float(x))
+        #
+        check('1E400', float('inf'))
 
     def test_decode_numeric_invalid(self):
         import _fastjson
@@ -121,10 +136,12 @@
         #
         error('  42   abc')
         error('.123')
+        error('+123')
         error('12.')
         error('12.-3')
         error('12E')
         error('12E-')
+        error('0123') # numbers can't start with 0
 
     def test_decode_object(self):
         import _fastjson