[pypy-commit] pypy improve-str2charp: hg merge default
antocuni
noreply at buildbot.pypy.org
Mon Jul 8 17:08:37 CEST 2013
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: improve-str2charp
Changeset: r65269:4d172e0da296
Date: 2013-07-08 17:07 +0200
http://bitbucket.org/pypy/pypy/changeset/4d172e0da296/
Log: hg merge default
diff --git a/lib-python/2.7/json/__init__.py b/lib-python/2.7/json/__init__.py
--- a/lib-python/2.7/json/__init__.py
+++ b/lib-python/2.7/json/__init__.py
@@ -105,6 +105,12 @@
__author__ = 'Bob Ippolito <bob at redivi.com>'
+try:
+ # PyPy speedup, the interface is different than CPython's _json
+ import _pypyjson
+except ImportError:
+ _pypyjson = None
+
from .decoder import JSONDecoder
from .encoder import JSONEncoder
@@ -241,7 +247,6 @@
_default_decoder = JSONDecoder(encoding=None, object_hook=None,
object_pairs_hook=None)
-
def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
"""Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
@@ -323,7 +328,10 @@
if (cls is None and encoding is None and object_hook is None and
parse_int is None and parse_float is None and
parse_constant is None and object_pairs_hook is None and not kw):
- return _default_decoder.decode(s)
+ if _pypyjson and not isinstance(s, unicode):
+ return _pypyjson.loads(s)
+ else:
+ return _default_decoder.decode(s)
if cls is None:
cls = JSONDecoder
if object_hook is not None:
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -35,7 +35,7 @@
"thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
"binascii", "_multiprocessing", '_warnings',
"_collections", "_multibytecodec", "micronumpy", "_ffi",
- "_continuation", "_cffi_backend", "_csv", "cppyy"]
+ "_continuation", "_cffi_backend", "_csv", "cppyy", "_pypyjson"]
))
translation_modules = default_modules.copy()
diff --git a/pypy/doc/config/objspace.usemodules._pypyjson.txt b/pypy/doc/config/objspace.usemodules._pypyjson.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._pypyjson.txt
@@ -0,0 +1,1 @@
+RPython speedups for the stdlib json module
diff --git a/pypy/module/_pypyjson/__init__.py b/pypy/module/_pypyjson/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/__init__.py
@@ -0,0 +1,10 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+class Module(MixedModule):
+ """fast json implementation"""
+
+ appleveldefs = {}
+
+ interpleveldefs = {
+ 'loads' : 'interp_decoder.loads',
+ }
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -0,0 +1,404 @@
+import sys
+import math
+from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib import rfloat
+from rpython.rtyper.lltypesystem import lltype, rffi
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter import unicodehelper
+from rpython.rtyper.annlowlevel import llstr, hlunicode
+
+OVF_DIGITS = len(str(sys.maxint))
+
+def is_whitespace(ch):
+ return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n'
+
+# precomputing negative powers of 10 is MUCH faster than using e.g. math.pow
+# at runtime
+NEG_POW_10 = [10.0**-i for i in range(16)]
+def neg_pow_10(x, exp):
+ if exp >= len(NEG_POW_10):
+ return 0.0
+ return x * NEG_POW_10[exp]
+
+def strslice2unicode_latin1(s, start, end):
+ """
+ Convert s[start:end] to unicode. s is supposed to be an RPython string
+ encoded in latin-1, which means that the numeric value of each char is the
+ same as the corresponding unicode code point.
+
+ Internally it's implemented at the level of low-level helpers, to avoid
+ the extra copy we would need if we take the actual slice first.
+
+ No bound checking is done, use carefully.
+ """
+ from rpython.rtyper.annlowlevel import llstr, hlunicode
+ from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
+ from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
+ length = end-start
+ ll_s = llstr(s)
+ ll_res = malloc(UNICODE, length)
+ ll_res.hash = 0
+ for i in range(length):
+ ch = ll_s.chars[start+i]
+ ll_res.chars[i] = cast_primitive(UniChar, ch)
+ return hlunicode(ll_res)
+
+TYPE_UNKNOWN = 0
+TYPE_STRING = 1
+class JSONDecoder(object):
+ def __init__(self, space, s):
+ self.space = space
+ self.s = s
+ # we put our string in a raw buffer so:
+ # 1) we automatically get the '\0' sentinel at the end of the string,
+ # which means that we never have to check for the "end of string"
+ # 2) we can pass the buffer directly to strtod
+ self.ll_chars = rffi.str2charp(s)
+ self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+ self.pos = 0
+ self.last_type = TYPE_UNKNOWN
+
+ def close(self):
+ rffi.free_charp(self.ll_chars)
+ lltype.free(self.end_ptr, flavor='raw')
+
+ def getslice(self, start, end):
+ assert start >= 0
+ assert end >= 0
+ return self.s[start:end]
+
+ def skip_whitespace(self, i):
+ while True:
+ ch = self.ll_chars[i]
+ if is_whitespace(ch):
+ i+=1
+ else:
+ break
+ return i
+
+ @specialize.arg(1)
+ def _raise(self, msg, *args):
+ raise operationerrfmt(self.space.w_ValueError, msg, *args)
+
+ def decode_any(self, i):
+ i = self.skip_whitespace(i)
+ ch = self.ll_chars[i]
+ if ch == '"':
+ return self.decode_string(i+1)
+ elif ch == '[':
+ return self.decode_array(i+1)
+ elif ch == '{':
+ return self.decode_object(i+1)
+ elif ch == 'n':
+ return self.decode_null(i+1)
+ elif ch == 't':
+ return self.decode_true(i+1)
+ elif ch == 'f':
+ return self.decode_false(i+1)
+ elif ch == 'I':
+ return self.decode_infinity(i+1)
+ elif ch == 'N':
+ return self.decode_nan(i+1)
+ elif ch == '-':
+ if self.ll_chars[i+1] == 'I':
+ return self.decode_infinity(i+2, sign=-1)
+ return self.decode_numeric(i)
+ elif ch.isdigit():
+ return self.decode_numeric(i)
+ else:
+ self._raise("No JSON object could be decoded: unexpected '%s' at char %d",
+ ch, self.pos)
+
+ def decode_null(self, i):
+ if (self.ll_chars[i] == 'u' and
+ self.ll_chars[i+1] == 'l' and
+ self.ll_chars[i+2] == 'l'):
+ self.pos = i+3
+ return self.space.w_None
+ self._raise("Error when decoding null at char %d", i)
+
+ def decode_true(self, i):
+ if (self.ll_chars[i] == 'r' and
+ self.ll_chars[i+1] == 'u' and
+ self.ll_chars[i+2] == 'e'):
+ self.pos = i+3
+ return self.space.w_True
+ self._raise("Error when decoding true at char %d", i)
+
+ def decode_false(self, i):
+ if (self.ll_chars[i] == 'a' and
+ self.ll_chars[i+1] == 'l' and
+ self.ll_chars[i+2] == 's' and
+ self.ll_chars[i+3] == 'e'):
+ self.pos = i+4
+ return self.space.w_False
+ self._raise("Error when decoding false at char %d", i)
+
+ def decode_infinity(self, i, sign=1):
+ if (self.ll_chars[i] == 'n' and
+ self.ll_chars[i+1] == 'f' and
+ self.ll_chars[i+2] == 'i' and
+ self.ll_chars[i+3] == 'n' and
+ self.ll_chars[i+4] == 'i' and
+ self.ll_chars[i+5] == 't' and
+ self.ll_chars[i+6] == 'y'):
+ self.pos = i+7
+ return self.space.wrap(rfloat.INFINITY * sign)
+ self._raise("Error when decoding Infinity at char %d", i)
+
+ def decode_nan(self, i):
+ if (self.ll_chars[i] == 'a' and
+ self.ll_chars[i+1] == 'N'):
+ self.pos = i+2
+ return self.space.wrap(rfloat.NAN)
+ self._raise("Error when decoding NaN at char %d", i)
+
+ def decode_numeric(self, i):
+ start = i
+ i, ovf_maybe, intval = self.parse_integer(i)
+ #
+ # check for the optional fractional part
+ ch = self.ll_chars[i]
+ if ch == '.':
+ if not self.ll_chars[i+1].isdigit():
+ self._raise("Expected digit at char %d", i+1)
+ return self.decode_float(start)
+ elif ch == 'e' or ch == 'E':
+ return self.decode_float(start)
+ elif ovf_maybe:
+ return self.decode_int_slow(start)
+
+ self.pos = i
+ return self.space.wrap(intval)
+
+ def decode_float(self, i):
+ from rpython.rlib import rdtoa
+ start = rffi.ptradd(self.ll_chars, i)
+ floatval = rdtoa.dg_strtod(start, self.end_ptr)
+ diff = rffi.cast(rffi.LONG, self.end_ptr[0]) - rffi.cast(rffi.LONG, start)
+ self.pos = i + diff
+ return self.space.wrap(floatval)
+
+ def decode_int_slow(self, i):
+ start = i
+ if self.ll_chars[i] == '-':
+ i += 1
+ while self.ll_chars[i].isdigit():
+ i += 1
+ s = self.getslice(start, i)
+ self.pos = i
+ return self.space.call_function(self.space.w_int, self.space.wrap(s))
+
+ def parse_integer(self, i):
+ "Parse a decimal number with an optional minus sign"
+ sign = 1
+ # parse the sign
+ if self.ll_chars[i] == '-':
+ sign = -1
+ i += 1
+ elif self.ll_chars[i] == '+':
+ i += 1
+ #
+ if self.ll_chars[i] == '0':
+ i += 1
+ return i, False, 0
+
+ intval = 0
+ start = i
+ while True:
+ ch = self.ll_chars[i]
+ if ch.isdigit():
+ intval = intval*10 + ord(ch)-ord('0')
+ i += 1
+ else:
+ break
+ count = i - start
+ if count == 0:
+ self._raise("Expected digit at char %d", i)
+ # if the number has more digits than OVF_DIGITS, it might have
+ # overflowed
+ ovf_maybe = (count >= OVF_DIGITS)
+ return i, ovf_maybe, sign * intval
+ parse_integer._always_inline_ = True
+
+ def decode_array(self, i):
+ w_list = self.space.newlist([])
+ start = i
+ count = 0
+ i = self.skip_whitespace(start)
+ if self.ll_chars[i] == ']':
+ self.pos = i+1
+ return w_list
+ #
+ while True:
+ w_item = self.decode_any(i)
+ i = self.pos
+ self.space.call_method(w_list, 'append', w_item)
+ i = self.skip_whitespace(i)
+ ch = self.ll_chars[i]
+ i += 1
+ if ch == ']':
+ self.pos = i
+ return w_list
+ elif ch == ',':
+ pass
+ elif ch == '\0':
+ self._raise("Unterminated array starting at char %d", start)
+ else:
+ self._raise("Unexpected '%s' when decoding array (char %d)",
+ ch, self.pos)
+
+ def decode_object(self, i):
+ start = i
+ w_dict = self.space.newdict()
+ #
+ i = self.skip_whitespace(i)
+ if self.ll_chars[i] == '}':
+ self.pos = i+1
+ return w_dict
+ #
+ while True:
+ # parse a key: value
+ self.last_type = TYPE_UNKNOWN
+ w_name = self.decode_any(i)
+ if self.last_type != TYPE_STRING:
+ self._raise("Key name must be string for object starting at char %d", start)
+ i = self.skip_whitespace(self.pos)
+ ch = self.ll_chars[i]
+ if ch != ':':
+ self._raise("No ':' found at char %d", i)
+ i += 1
+ i = self.skip_whitespace(i)
+ #
+ w_value = self.decode_any(i)
+ self.space.setitem(w_dict, w_name, w_value)
+ i = self.skip_whitespace(self.pos)
+ ch = self.ll_chars[i]
+ i += 1
+ if ch == '}':
+ self.pos = i
+ return w_dict
+ elif ch == ',':
+ pass
+ elif ch == '\0':
+ self._raise("Unterminated object starting at char %d", start)
+ else:
+ self._raise("Unexpected '%s' when decoding object (char %d)",
+ ch, self.pos)
+
+
+ def decode_string(self, i):
+ start = i
+ bits = 0
+ while True:
+ # this loop is a fast path for strings which do not contain escape
+ # characters
+ ch = self.ll_chars[i]
+ i += 1
+ bits |= ord(ch)
+ if ch == '"':
+ if bits & 0x80:
+ # the 8th bit is set, it's an utf8 strnig
+ content_utf8 = self.getslice(start, i-1)
+ content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+ else:
+ # ascii only, fast path (ascii is a strict subset of
+ # latin1, and we already checked that all the chars are <
+ # 128)
+ content_unicode = strslice2unicode_latin1(self.s, start, i-1)
+ self.last_type = TYPE_STRING
+ self.pos = i
+ return self.space.wrap(content_unicode)
+ elif ch == '\\':
+ content_so_far = self.getslice(start, i-1)
+ self.pos = i-1
+ return self.decode_string_escaped(start, content_so_far)
+ elif ch == '\0':
+ self._raise("Unterminated string starting at char %d", start)
+
+
+ def decode_string_escaped(self, start, content_so_far):
+ builder = StringBuilder(len(content_so_far)*2) # just an estimate
+ builder.append(content_so_far)
+ i = self.pos
+ while True:
+ ch = self.ll_chars[i]
+ i += 1
+ if ch == '"':
+ content_utf8 = builder.build()
+ content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+ self.last_type = TYPE_STRING
+ self.pos = i
+ return self.space.wrap(content_unicode)
+ elif ch == '\\':
+ i = self.decode_escape_sequence(i, builder)
+ elif ch == '\0':
+ self._raise("Unterminated string starting at char %d", start)
+ else:
+ builder.append_multiple_char(ch, 1) # we should implement append_char
+
+ def decode_escape_sequence(self, i, builder):
+ ch = self.ll_chars[i]
+ i += 1
+ put = builder.append_multiple_char
+ if ch == '\\': put('\\', 1)
+ elif ch == '"': put('"' , 1)
+ elif ch == '/': put('/' , 1)
+ elif ch == 'b': put('\b', 1)
+ elif ch == 'f': put('\f', 1)
+ elif ch == 'n': put('\n', 1)
+ elif ch == 'r': put('\r', 1)
+ elif ch == 't': put('\t', 1)
+ elif ch == 'u':
+ return self.decode_escape_sequence_unicode(i, builder)
+ else:
+ self._raise("Invalid \\escape: %s (char %d)", ch, self.pos-1)
+ return i
+
+ def decode_escape_sequence_unicode(self, i, builder):
+ # at this point we are just after the 'u' of the \u1234 sequence.
+ start = i
+ i += 4
+ hexdigits = self.getslice(start, i)
+ try:
+ val = int(hexdigits, 16)
+ if val & 0xfc00 == 0xd800:
+ # surrogate pair
+ val = self.decode_surrogate_pair(i, val)
+ i += 6
+ except ValueError:
+ self._raise("Invalid \uXXXX escape (char %d)", i-1)
+ return # help the annotator to know that we'll never go beyond
+ # this point
+ #
+ uchr = unichr(val)
+ utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
+ builder.append(utf8_ch)
+ return i
+
+ def decode_surrogate_pair(self, i, highsurr):
+ if self.ll_chars[i] != '\\' or self.ll_chars[i+1] != 'u':
+ self._raise("Unpaired high surrogate at char %d", i)
+ i += 2
+ hexdigits = self.getslice(i, i+4)
+ lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller
+ return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
+
+def loads(space, w_s):
+ if space.isinstance_w(w_s, space.w_unicode):
+ raise OperationError(space.w_TypeError,
+ space.wrap("Expected utf8-encoded str, got unicode"))
+ s = space.str_w(w_s)
+ decoder = JSONDecoder(space, s)
+ try:
+ w_res = decoder.decode_any(0)
+ i = decoder.skip_whitespace(decoder.pos)
+ if i < len(s):
+ start = i
+ end = len(s) - 1
+ raise operationerrfmt(space.w_ValueError, "Extra data: char %d - %d", start, end)
+ return w_res
+ finally:
+ decoder.close()
diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -0,0 +1,143 @@
+import sys
+import py
+ROOT = py.path.local(__file__).dirpath('..', '..', '..')
+sys.path.insert(0, str(ROOT))
+
+import time
+from rpython.rlib.streamio import open_file_as_stream
+from pypy.interpreter.error import OperationError
+from pypy.module._pypyjson.interp_decoder import loads
+
+
+
+## MSG = open('msg.json').read()
+
+class W_Root(object):
+ pass
+
+class W_Dict(W_Root):
+ def __init__(self):
+ self.dictval = {}
+
+class W_Unicode(W_Root):
+ def __init__(self, x):
+ self.unival = x
+
+class W_String(W_Root):
+ def __init__(self, x):
+ self.strval = x
+
+class W_Int(W_Root):
+ def __init__(self, x):
+ self.intval = x
+
+class W_Float(W_Root):
+ def __init__(self, x):
+ self.floatval = x
+
+class W_List(W_Root):
+ def __init__(self):
+ self.listval = []
+
+class W_Singleton(W_Root):
+ def __init__(self, name):
+ self.name = name
+
+class FakeSpace(object):
+
+ w_None = W_Singleton('None')
+ w_True = W_Singleton('True')
+ w_False = W_Singleton('False')
+ w_ValueError = W_Singleton('ValueError')
+ w_UnicodeDecodeError = W_Singleton('UnicodeDecodeError')
+ w_unicode = W_Unicode
+ w_int = W_Int
+ w_float = W_Float
+
+ def newtuple(self, items):
+ return None
+
+ def newdict(self):
+ return W_Dict()
+
+ def newlist(self, items):
+ return W_List()
+
+ def isinstance_w(self, w_x, w_type):
+ return isinstance(w_x, w_type)
+
+ def str_w(self, w_x):
+ assert isinstance(w_x, W_String)
+ return w_x.strval
+
+ def call_method(self, obj, name, arg):
+ assert name == 'append'
+ assert isinstance(obj, W_List)
+ obj.listval.append(arg)
+ call_method._dont_inline_ = True
+
+ def call_function(self, w_func, *args_w):
+ return self.w_None # XXX
+
+ def setitem(self, d, key, value):
+ assert isinstance(d, W_Dict)
+ assert isinstance(key, W_Unicode)
+ d.dictval[key.unival] = value
+
+ def wrapunicode(self, x):
+ return W_Unicode(x)
+
+ def wrapint(self, x):
+ return W_Int(x)
+
+ def wrapfloat(self, x):
+ return W_Float(x)
+
+ def wrap(self, x):
+ if isinstance(x, int):
+ return W_Int(x)
+ elif isinstance(x, float):
+ return W_Float(x)
+ ## elif isinstance(x, str):
+ ## assert False
+ else:
+ return W_Unicode(unicode(x))
+ wrap._annspecialcase_ = "specialize:argtype(1)"
+
+
+fakespace = FakeSpace()
+
+def myloads(msg):
+ return loads(fakespace, W_String(msg))
+
+
+def bench(title, N, fn, arg):
+ a = time.clock()
+ for i in range(N):
+ res = fn(arg)
+ b = time.clock()
+ print title, (b-a) / N * 1000
+
+def entry_point(argv):
+ if len(argv) != 3:
+ print 'Usage: %s FILE n' % argv[0]
+ return 1
+ filename = argv[1]
+ N = int(argv[2])
+ f = open_file_as_stream(filename)
+ msg = f.readall()
+
+ try:
+ bench('loads ', N, myloads, msg)
+ except OperationError, e:
+ print 'Error', e._compute_value(fakespace)
+
+ return 0
+
+# _____ Define and setup target ___
+
+def target(*args):
+ return entry_point, None
+
+if __name__ == '__main__':
+ entry_point(sys.argv)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -0,0 +1,188 @@
+# -*- encoding: utf-8 -*-
+import py
+from pypy.module._pypyjson.interp_decoder import JSONDecoder
+
+def test_skip_whitespace():
+ s = ' hello '
+ dec = JSONDecoder('fake space', s)
+ assert dec.pos == 0
+ assert dec.skip_whitespace(0) == 3
+ assert dec.skip_whitespace(3) == 3
+ assert dec.skip_whitespace(8) == len(s)
+ dec.close()
+
+
+
+class AppTest(object):
+ spaceconfig = {"objspace.usemodules._pypyjson": True}
+
+ def test_raise_on_unicode(self):
+ import _pypyjson
+ raises(TypeError, _pypyjson.loads, u"42")
+
+
+ def test_decode_constants(self):
+ import _pypyjson
+ assert _pypyjson.loads('null') is None
+ raises(ValueError, _pypyjson.loads, 'nul')
+ raises(ValueError, _pypyjson.loads, 'nu')
+ raises(ValueError, _pypyjson.loads, 'n')
+ raises(ValueError, _pypyjson.loads, 'nuXX')
+ #
+ assert _pypyjson.loads('true') is True
+ raises(ValueError, _pypyjson.loads, 'tru')
+ raises(ValueError, _pypyjson.loads, 'tr')
+ raises(ValueError, _pypyjson.loads, 't')
+ raises(ValueError, _pypyjson.loads, 'trXX')
+ #
+ assert _pypyjson.loads('false') is False
+ raises(ValueError, _pypyjson.loads, 'fals')
+ raises(ValueError, _pypyjson.loads, 'fal')
+ raises(ValueError, _pypyjson.loads, 'fa')
+ raises(ValueError, _pypyjson.loads, 'f')
+ raises(ValueError, _pypyjson.loads, 'falXX')
+
+
+ def test_decode_string(self):
+ import _pypyjson
+ res = _pypyjson.loads('"hello"')
+ assert res == u'hello'
+ assert type(res) is unicode
+
+ def test_decode_string_utf8(self):
+ import _pypyjson
+ s = u'àèìòù'
+ res = _pypyjson.loads('"%s"' % s.encode('utf-8'))
+ assert res == s
+
+ def test_skip_whitespace(self):
+ import _pypyjson
+ s = ' "hello" '
+ assert _pypyjson.loads(s) == u'hello'
+ s = ' "hello" extra'
+ raises(ValueError, "_pypyjson.loads(s)")
+
+ def test_unterminated_string(self):
+ import _pypyjson
+ s = '"hello' # missing the trailing "
+ raises(ValueError, "_pypyjson.loads(s)")
+
+ def test_escape_sequence(self):
+ import _pypyjson
+ assert _pypyjson.loads(r'"\\"') == u'\\'
+ assert _pypyjson.loads(r'"\""') == u'"'
+ assert _pypyjson.loads(r'"\/"') == u'/'
+ assert _pypyjson.loads(r'"\b"') == u'\b'
+ assert _pypyjson.loads(r'"\f"') == u'\f'
+ assert _pypyjson.loads(r'"\n"') == u'\n'
+ assert _pypyjson.loads(r'"\r"') == u'\r'
+ assert _pypyjson.loads(r'"\t"') == u'\t'
+
+ def test_escape_sequence_in_the_middle(self):
+ import _pypyjson
+ s = r'"hello\nworld"'
+ assert _pypyjson.loads(s) == "hello\nworld"
+
+ def test_unterminated_string_after_escape_sequence(self):
+ import _pypyjson
+ s = r'"hello\nworld' # missing the trailing "
+ raises(ValueError, "_pypyjson.loads(s)")
+
+ def test_escape_sequence_unicode(self):
+ import _pypyjson
+ s = r'"\u1234"'
+ assert _pypyjson.loads(s) == u'\u1234'
+
+ def test_invalid_utf_8(self):
+ import _pypyjson
+ s = '"\xe0"' # this is an invalid UTF8 sequence inside a string
+ raises(UnicodeDecodeError, "_pypyjson.loads(s)")
+
+ def test_decode_numeric(self):
+ import sys
+ import _pypyjson
+ def check(s, val):
+ res = _pypyjson.loads(s)
+ assert type(res) is type(val)
+ assert res == val
+ #
+ check('42', 42)
+ check('-42', -42)
+ check('42.123', 42.123)
+ check('42E0', 42.0)
+ check('42E3', 42000.0)
+ check('42E-1', 4.2)
+ check('42E+1', 420.0)
+ check('42.123E3', 42123.0)
+ check('0', 0)
+ check('-0', 0)
+ check('0.123', 0.123)
+ check('0E3', 0.0)
+ check('5E0001', 50.0)
+ check(str(1 << 32), 1 << 32)
+ check(str(1 << 64), 1 << 64)
+ #
+ x = str(sys.maxint+1) + '.123'
+ check(x, float(x))
+ x = str(sys.maxint+1) + 'E1'
+ check(x, float(x))
+ x = str(sys.maxint+1) + 'E-1'
+ check(x, float(x))
+ #
+ check('1E400', float('inf'))
+ ## # these are non-standard but supported by CPython json
+ check('Infinity', float('inf'))
+ check('-Infinity', float('-inf'))
+
+ def test_nan(self):
+ import math
+ import _pypyjson
+ res = _pypyjson.loads('NaN')
+ assert math.isnan(res)
+
+ def test_decode_numeric_invalid(self):
+ import _pypyjson
+ def error(s):
+ raises(ValueError, _pypyjson.loads, s)
+ #
+ error(' 42 abc')
+ error('.123')
+ error('+123')
+ error('12.')
+ error('12.-3')
+ error('12E')
+ error('12E-')
+ error('0123') # numbers can't start with 0
+
+ def test_decode_object(self):
+ import _pypyjson
+ assert _pypyjson.loads('{}') == {}
+ assert _pypyjson.loads('{ }') == {}
+ #
+ s = '{"hello": "world", "aaa": "bbb"}'
+ assert _pypyjson.loads(s) == {'hello': 'world',
+ 'aaa': 'bbb'}
+ raises(ValueError, _pypyjson.loads, '{"key"')
+ raises(ValueError, _pypyjson.loads, '{"key": 42')
+
+ def test_decode_object_nonstring_key(self):
+ import _pypyjson
+ raises(ValueError, "_pypyjson.loads('{42: 43}')")
+
+ def test_decode_array(self):
+ import _pypyjson
+ assert _pypyjson.loads('[]') == []
+ assert _pypyjson.loads('[ ]') == []
+ assert _pypyjson.loads('[1]') == [1]
+ assert _pypyjson.loads('[1, 2]') == [1, 2]
+ raises(ValueError, "_pypyjson.loads('[1: 2]')")
+ raises(ValueError, "_pypyjson.loads('[1, 2')")
+ raises(ValueError, """_pypyjson.loads('["extra comma",]')""")
+
+ def test_unicode_surrogate_pair(self):
+ import _pypyjson
+ expected = u'z\U0001d120x'
+ res = _pypyjson.loads('"z\\ud834\\udd20x"')
+ assert res == expected
+
+
diff --git a/pypy/tool/gdb_pypy.py b/pypy/tool/gdb_pypy.py
--- a/pypy/tool/gdb_pypy.py
+++ b/pypy/tool/gdb_pypy.py
@@ -76,18 +76,22 @@
def invoke(self, arg, from_tty):
# some magic code to automatically reload the python file while developing
- ## from pypy.tool import gdb_pypy
- ## reload(gdb_pypy)
- ## gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
- ## self.__class__ = gdb_pypy.RPyType
+ from pypy.tool import gdb_pypy
+ reload(gdb_pypy)
+ gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
+ self.__class__ = gdb_pypy.RPyType
print self.do_invoke(arg, from_tty)
def do_invoke(self, arg, from_tty):
- obj = self.gdb.parse_and_eval(arg)
- hdr = lookup(obj, '_gcheader')
- tid = hdr['h_tid']
- offset = tid & 0xFFFFFFFF # 64bit only
- offset = int(offset) # convert from gdb.Value to python int
+ try:
+ offset = int(arg)
+ except ValueError:
+ obj = self.gdb.parse_and_eval(arg)
+ hdr = lookup(obj, '_gcheader')
+ tid = hdr['h_tid']
+ offset = tid & 0xFFFFFFFF # 64bit only
+ offset = int(offset) # convert from gdb.Value to python int
+
typeids = self.get_typeids()
if offset in typeids:
return typeids[offset]
More information about the pypy-commit
mailing list