[pypy-commit] pypy py3k: merge default

pjenvey noreply at buildbot.pypy.org
Tue Jul 9 19:23:07 CEST 2013


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: py3k
Changeset: r65297:f23adcde38f1
Date: 2013-07-08 15:50 -0700
http://bitbucket.org/pypy/pypy/changeset/f23adcde38f1/

Log:	merge default

diff --git a/lib-python/2.7/json/__init__.py b/lib-python/2.7/json/__init__.py
--- a/lib-python/2.7/json/__init__.py
+++ b/lib-python/2.7/json/__init__.py
@@ -105,6 +105,12 @@
 
 __author__ = 'Bob Ippolito <bob at redivi.com>'
 
+try:
+    # PyPy speedup, the interface is different than CPython's _json
+    import _pypyjson
+except ImportError:
+    _pypyjson = None
+
 from .decoder import JSONDecoder
 from .encoder import JSONEncoder
 
@@ -241,7 +247,6 @@
 _default_decoder = JSONDecoder(encoding=None, object_hook=None,
                                object_pairs_hook=None)
 
-
 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
@@ -323,7 +328,10 @@
     if (cls is None and encoding is None and object_hook is None and
             parse_int is None and parse_float is None and
             parse_constant is None and object_pairs_hook is None and not kw):
-        return _default_decoder.decode(s)
+        if _pypyjson and not isinstance(s, unicode):
+            return _pypyjson.loads(s)
+        else:
+            return _default_decoder.decode(s)
     if cls is None:
         cls = JSONDecoder
     if object_hook is not None:
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -36,7 +36,7 @@
      "binascii", "_multiprocessing", '_warnings',
      "_collections", "_multibytecodec", "_ffi",
      "_continuation", "_csv", "_cffi_backend",
-     "_posixsubprocess",  # "cppyy", "micronumpy",
+     "_posixsubprocess", "_pypyjson", # "cppyy", "micronumpy",
      ]
 ))
 
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -907,7 +907,7 @@
 runs at application level. If you need to use modules
 you have to import them within the test function.
 
-Another possibility to pass in data into the AppTest is to use
+Data can be passed into the AppTest using 
 the ``setup_class`` method of the AppTest. All wrapped objects that are
 attached to the class there and start with ``w_`` can be accessed
 via self (but without the ``w_``) in the actual test method. An example::
@@ -922,6 +922,46 @@
 
 .. _`run the tests as usual`:
 
+Another possibility is to use cls.space.appexec, for example::
+
+    class AppTestSomething(object):
+        def setup_class(cls):
+            arg = 2
+            cls.w_result = cls.space.appexec([cls.space.wrap(arg)], """(arg):
+                return arg ** 6
+                """)
+
+        def test_power(self):
+            assert self.result == 2 ** 6
+
+which executes the code string function with the given arguments at app level.
+Note the use of ``w_result`` in ``setup_class`` but self.result in the test 
+Here is how to define an app level class  in ``setup_class`` that can be used
+in subsequent tests::
+
+    class AppTestSet(object):
+        def setup_class(cls):
+            w_fakeint = cls.space.appexec([], """():
+                class FakeInt(object):
+                    def __init__(self, value):
+                        self.value = value
+                    def __hash__(self):
+                        return hash(self.value)
+
+                    def __eq__(self, other):
+                        if other == self.value:
+                            return True
+                        return False
+                return FakeInt
+                """)
+            cls.w_FakeInt = w_fakeint
+
+        def test_fakeint(self):
+            f1 = self.FakeInt(4)
+            assert f1 == 4
+            assert hash(f1) == hash(4)
+
+
 Command line tool test_all
 --------------------------
 
diff --git a/pypy/doc/config/objspace.usemodules._pypyjson.txt b/pypy/doc/config/objspace.usemodules._pypyjson.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._pypyjson.txt
@@ -0,0 +1,1 @@
+RPython speedups for the stdlib json module
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -96,8 +96,21 @@
 Does PyPy have a GIL?  Why?
 -------------------------------------------------
 
-Yes, PyPy has a GIL.  Removing the GIL is very hard.  The first problem
-is that our garbage collectors are not re-entrant.
+Yes, PyPy has a GIL.  Removing the GIL is very hard.  The problems are
+essentially the same as with CPython (including the fact that our
+garbage collectors are not thread-safe so far).  Fixing it is possible,
+as shown by Jython and IronPython, but difficult.  It would require
+adapting the whole source code of PyPy, including subtle decisions about
+whether some effects are ok or not for the user (i.e. the Python
+programmer).
+
+Instead, since 2012, there is work going on on a still very experimental
+Software Transactional Memory (STM) version of PyPy.  This should give
+an alternative PyPy which internally has no GIL, while at the same time
+continuing to give the Python programmer the complete illusion of having
+one.  It would in fact push forward *more* GIL-ish behavior, like
+declaring that some sections of the code should run without releasing
+the GIL in the middle (these are called *atomic sections* in STM).
 
 ------------------------------------------
 How do I write extension modules for PyPy?
@@ -306,7 +319,7 @@
 
 No, and you shouldn't try.  First and foremost, RPython is a language
 designed for writing interpreters. It is a restricted subset of
-Python.  If you program is not an interpreter but tries to do "real
+Python.  If your program is not an interpreter but tries to do "real
 things", like use *any* part of the standard Python library or *any*
 3rd-party library, then it is not RPython to start with.  You should
 only look at RPython if you try to `write your own interpreter`__.
@@ -322,8 +335,35 @@
 Yes, it is possible with enough effort to compile small self-contained
 pieces of RPython code doing a few performance-sensitive things.  But
 this case is not interesting for us.  If you needed to rewrite the code
-in RPython, you could as well have rewritten it in C for example.  The
-latter is a much more supported, much more documented language `:-)`
+in RPython, you could as well have rewritten it in C or C++ or Java for
+example.  These are much more supported, much more documented languages
+`:-)`
+
+  *The above paragraphs are not the whole truth.  It* is *true that there
+  are cases where writing a program as RPython gives you substantially
+  better speed than running it on top of PyPy.  However, the attitude of
+  the core group of people behind PyPy is to answer: "then report it as a
+  performance bug against PyPy!".*
+
+  *Here is a more diluted way to put it.  The "No, don't!" above is a
+  general warning we give to new people.  They are likely to need a lot
+  of help from* some *source, because RPython is not so simple nor
+  extensively documented; but at the same time, we, the pypy core group
+  of people, are not willing to invest time in supporting 3rd-party
+  projects that do very different things than interpreters for dynamic
+  languages --- just because we have other interests and there are only
+  so many hours a day.  So as a summary I believe it is only fair to
+  attempt to point newcomers at existing alternatives, which are more
+  mainstream and where they will get help from many people.*
+
+  *If anybody seriously wants to promote RPython anyway, he is welcome
+  to: we won't actively resist such a plan.  There are a lot of things
+  that could be done to make RPython a better Java-ish language for
+  example, starting with supporting non-GIL-based multithreading, but we
+  don't implement them because they have little relevance to us.  This
+  is open source, which means that anybody is free to promote and
+  develop anything; but it also means that you must let us choose* not
+  *to go into that direction ourselves.*
 
 ---------------------------------------------------
 Which backends are there for the RPython toolchain?
diff --git a/pypy/doc/whatsnew-2.1.rst b/pypy/doc/whatsnew-2.1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/whatsnew-2.1.rst
@@ -0,0 +1,78 @@
+======================
+What's new in PyPy 2.1
+======================
+
+.. this is a revision shortly after release-2.0
+.. startrev: a13c07067613
+
+.. branch: ndarray-ptp
+put and array.put
+
+.. branch: numpy-pickle
+Pickling of numpy arrays and dtypes (including record dtypes)
+
+.. branch: remove-array-smm
+Remove multimethods in the arraymodule
+
+.. branch: callback-stacklet
+Fixed bug when switching stacklets from a C callback
+
+.. branch: remove-set-smm
+Remove multi-methods on sets
+
+.. branch: numpy-subarrays
+Implement subarrays for numpy
+
+.. branch: remove-dict-smm
+Remove multi-methods on dict
+
+.. branch: remove-list-smm-2
+Remove remaining multi-methods on list
+
+.. branch: arm-stacklet
+Stacklet support for ARM, enables _continuation support
+
+.. branch: remove-tuple-smm
+Remove multi-methods on tuple
+
+.. branch: remove-iter-smm
+Remove multi-methods on iterators
+
+.. branch: emit-call-x86
+.. branch: emit-call-arm
+
+.. branch: on-abort-resops
+Added list of resops to the pypyjit on_abort hook.
+
+.. branch: logging-perf
+Speeds up the stdlib logging module
+
+.. branch: operrfmt-NT
+Adds a couple convenient format specifiers to operationerrfmt
+
+.. branch: win32-fixes3
+Skip and fix some non-translated (own) tests for win32 builds
+
+.. branch: ctypes-byref
+Add the '_obj' attribute on ctypes pointer() and byref() objects
+
+.. branch: argsort-segfault
+Fix a segfault in argsort when sorting by chunks on multidim numpypy arrays (mikefc)
+
+.. branch: dtype-isnative
+.. branch: ndarray-round
+
+.. branch: faster-str-of-bigint
+Improve performance of str(long).
+
+.. branch: ndarray-view
+Add view to ndarray and zeroD arrays, not on dtype scalars yet
+
+.. branch: numpypy-segfault
+fix segfault caused by iterating over empty ndarrays
+
+.. branch: identity-set
+Faster sets for objects
+
+.. branch: inline-identityhash
+Inline the fast path of id() and hash()
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,6 +5,9 @@
 .. this is a revision shortly after release-2.0
 .. startrev: a13c07067613
 
+.. branch: ndarray-ptp
+put and array.put
+
 .. branch: numpy-pickle
 Pickling of numpy arrays and dtypes (including record dtypes)
 
@@ -65,6 +68,9 @@
 .. branch: ndarray-view
 Add view to ndarray and zeroD arrays, not on dtype scalars yet
 
+.. branch: numpypy-segfault
+fix segfault caused by iterating over empty ndarrays
+
 .. branch: identity-set
 Faster sets for objects
 
diff --git a/pypy/module/_pypyjson/__init__.py b/pypy/module/_pypyjson/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/__init__.py
@@ -0,0 +1,10 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+class Module(MixedModule):
+    """fast json implementation"""
+
+    appleveldefs = {}
+
+    interpleveldefs = {
+        'loads' : 'interp_decoder.loads',
+        }
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -0,0 +1,404 @@
+import sys
+import math
+from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.objectmodel import specialize
+from rpython.rlib import rfloat
+from rpython.rtyper.lltypesystem import lltype, rffi
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter import unicodehelper
+from rpython.rtyper.annlowlevel import llstr, hlunicode
+
+OVF_DIGITS = len(str(sys.maxint))
+
+def is_whitespace(ch):
+    return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n'
+
+# precomputing negative powers of 10 is MUCH faster than using e.g. math.pow
+# at runtime
+NEG_POW_10 = [10.0**-i for i in range(16)]
+def neg_pow_10(x, exp):
+    if exp >= len(NEG_POW_10):
+        return 0.0
+    return x * NEG_POW_10[exp]
+
+def strslice2unicode_latin1(s, start, end):
+    """
+    Convert s[start:end] to unicode. s is supposed to be an RPython string
+    encoded in latin-1, which means that the numeric value of each char is the
+    same as the corresponding unicode code point.
+
+    Internally it's implemented at the level of low-level helpers, to avoid
+    the extra copy we would need if we take the actual slice first.
+    
+    No bound checking is done, use carefully.
+    """
+    from rpython.rtyper.annlowlevel import llstr, hlunicode
+    from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
+    from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
+    length = end-start
+    ll_s = llstr(s)
+    ll_res = malloc(UNICODE, length)
+    ll_res.hash = 0
+    for i in range(length):
+        ch = ll_s.chars[start+i]
+        ll_res.chars[i] = cast_primitive(UniChar, ch)
+    return hlunicode(ll_res)
+
+TYPE_UNKNOWN = 0
+TYPE_STRING = 1
+class JSONDecoder(object):
+    def __init__(self, space, s):
+        self.space = space
+        self.s = s
+        # we put our string in a raw buffer so:
+        # 1) we automatically get the '\0' sentinel at the end of the string,
+        #    which means that we never have to check for the "end of string"
+        # 2) we can pass the buffer directly to strtod
+        self.ll_chars = rffi.str2charp(s)
+        self.end_ptr = lltype.malloc(rffi.CCHARPP.TO, 1, flavor='raw')
+        self.pos = 0
+        self.last_type = TYPE_UNKNOWN
+
+    def close(self):
+        rffi.free_charp(self.ll_chars)
+        lltype.free(self.end_ptr, flavor='raw')
+
+    def getslice(self, start, end):
+        assert start >= 0
+        assert end >= 0
+        return self.s[start:end]
+
+    def skip_whitespace(self, i):
+        while True:
+            ch = self.ll_chars[i]
+            if is_whitespace(ch):
+                i+=1
+            else:
+                break
+        return i
+
+    @specialize.arg(1)
+    def _raise(self, msg, *args):
+        raise operationerrfmt(self.space.w_ValueError, msg, *args)
+
+    def decode_any(self, i):
+        i = self.skip_whitespace(i)
+        ch = self.ll_chars[i]
+        if ch == '"':
+            return self.decode_string(i+1)
+        elif ch == '[':
+            return self.decode_array(i+1)
+        elif ch == '{':
+            return self.decode_object(i+1)
+        elif ch == 'n':
+            return self.decode_null(i+1)
+        elif ch == 't':
+            return self.decode_true(i+1)
+        elif ch == 'f':
+            return self.decode_false(i+1)
+        elif ch == 'I':
+            return self.decode_infinity(i+1)
+        elif ch == 'N':
+            return self.decode_nan(i+1)
+        elif ch == '-':
+            if self.ll_chars[i+1] == 'I':
+                return self.decode_infinity(i+2, sign=-1)
+            return self.decode_numeric(i)
+        elif ch.isdigit():
+            return self.decode_numeric(i)
+        else:
+            self._raise("No JSON object could be decoded: unexpected '%s' at char %d",
+                        ch, self.pos)
+
+    def decode_null(self, i):
+        if (self.ll_chars[i]   == 'u' and
+            self.ll_chars[i+1] == 'l' and
+            self.ll_chars[i+2] == 'l'):
+            self.pos = i+3
+            return self.space.w_None
+        self._raise("Error when decoding null at char %d", i)
+
+    def decode_true(self, i):
+        if (self.ll_chars[i]   == 'r' and
+            self.ll_chars[i+1] == 'u' and
+            self.ll_chars[i+2] == 'e'):
+            self.pos = i+3
+            return self.space.w_True
+        self._raise("Error when decoding true at char %d", i)
+
+    def decode_false(self, i):
+        if (self.ll_chars[i]   == 'a' and
+            self.ll_chars[i+1] == 'l' and
+            self.ll_chars[i+2] == 's' and
+            self.ll_chars[i+3] == 'e'):
+            self.pos = i+4
+            return self.space.w_False
+        self._raise("Error when decoding false at char %d", i)
+
+    def decode_infinity(self, i, sign=1):
+        if (self.ll_chars[i]   == 'n' and
+            self.ll_chars[i+1] == 'f' and
+            self.ll_chars[i+2] == 'i' and
+            self.ll_chars[i+3] == 'n' and
+            self.ll_chars[i+4] == 'i' and
+            self.ll_chars[i+5] == 't' and
+            self.ll_chars[i+6] == 'y'):
+            self.pos = i+7
+            return self.space.wrap(rfloat.INFINITY * sign)
+        self._raise("Error when decoding Infinity at char %d", i)
+
+    def decode_nan(self, i):
+        if (self.ll_chars[i]   == 'a' and
+            self.ll_chars[i+1] == 'N'):
+            self.pos = i+2
+            return self.space.wrap(rfloat.NAN)
+        self._raise("Error when decoding NaN at char %d", i)
+
+    def decode_numeric(self, i):
+        start = i
+        i, ovf_maybe, intval = self.parse_integer(i)
+        #
+        # check for the optional fractional part
+        ch = self.ll_chars[i]
+        if ch == '.':
+            if not self.ll_chars[i+1].isdigit():
+                self._raise("Expected digit at char %d", i+1)
+            return self.decode_float(start)
+        elif ch == 'e' or ch == 'E':
+            return self.decode_float(start)
+        elif ovf_maybe:
+            return self.decode_int_slow(start)
+
+        self.pos = i
+        return self.space.wrap(intval)
+
+    def decode_float(self, i):
+        from rpython.rlib import rdtoa
+        start = rffi.ptradd(self.ll_chars, i)
+        floatval = rdtoa.dg_strtod(start, self.end_ptr)
+        diff = rffi.cast(rffi.LONG, self.end_ptr[0]) - rffi.cast(rffi.LONG, start)
+        self.pos = i + diff
+        return self.space.wrap(floatval)
+
+    def decode_int_slow(self, i):
+        start = i
+        if self.ll_chars[i] == '-':
+            i += 1
+        while self.ll_chars[i].isdigit():
+            i += 1
+        s = self.getslice(start, i)
+        self.pos = i
+        return self.space.call_function(self.space.w_int, self.space.wrap(s))
+
+    def parse_integer(self, i):
+        "Parse a decimal number with an optional minus sign"
+        sign = 1
+        # parse the sign
+        if self.ll_chars[i] == '-':
+            sign = -1
+            i += 1
+        elif self.ll_chars[i] == '+':
+            i += 1
+        #
+        if self.ll_chars[i] == '0':
+            i += 1
+            return i, False, 0
+
+        intval = 0
+        start = i
+        while True:
+            ch = self.ll_chars[i]
+            if ch.isdigit():
+                intval = intval*10 + ord(ch)-ord('0')
+                i += 1
+            else:
+                break
+        count = i - start
+        if count == 0:
+            self._raise("Expected digit at char %d", i)
+        # if the number has more digits than OVF_DIGITS, it might have
+        # overflowed
+        ovf_maybe = (count >= OVF_DIGITS)
+        return i, ovf_maybe, sign * intval
+    parse_integer._always_inline_ = True
+
+    def decode_array(self, i):
+        w_list = self.space.newlist([])
+        start = i
+        count = 0
+        i = self.skip_whitespace(start)
+        if self.ll_chars[i] == ']':
+            self.pos = i+1
+            return w_list
+        #
+        while True:
+            w_item = self.decode_any(i)
+            i = self.pos
+            self.space.call_method(w_list, 'append', w_item)
+            i = self.skip_whitespace(i)
+            ch = self.ll_chars[i]
+            i += 1
+            if ch == ']':
+                self.pos = i
+                return w_list
+            elif ch == ',':
+                pass
+            elif ch == '\0':
+                self._raise("Unterminated array starting at char %d", start)
+            else:
+                self._raise("Unexpected '%s' when decoding array (char %d)",
+                            ch, self.pos)
+
+    def decode_object(self, i):
+        start = i
+        w_dict = self.space.newdict()
+        #
+        i = self.skip_whitespace(i)
+        if self.ll_chars[i] == '}':
+            self.pos = i+1
+            return w_dict
+        #
+        while True:
+            # parse a key: value
+            self.last_type = TYPE_UNKNOWN
+            w_name = self.decode_any(i)
+            if self.last_type != TYPE_STRING:
+                self._raise("Key name must be string for object starting at char %d", start)
+            i = self.skip_whitespace(self.pos)
+            ch = self.ll_chars[i]
+            if ch != ':':
+                self._raise("No ':' found at char %d", i)
+            i += 1
+            i = self.skip_whitespace(i)
+            #
+            w_value = self.decode_any(i)
+            self.space.setitem(w_dict, w_name, w_value)
+            i = self.skip_whitespace(self.pos)
+            ch = self.ll_chars[i]
+            i += 1
+            if ch == '}':
+                self.pos = i
+                return w_dict
+            elif ch == ',':
+                pass
+            elif ch == '\0':
+                self._raise("Unterminated object starting at char %d", start)
+            else:
+                self._raise("Unexpected '%s' when decoding object (char %d)",
+                            ch, self.pos)
+
+
+    def decode_string(self, i):
+        start = i
+        bits = 0
+        while True:
+            # this loop is a fast path for strings which do not contain escape
+            # characters
+            ch = self.ll_chars[i]
+            i += 1
+            bits |= ord(ch)
+            if ch == '"':
+                if bits & 0x80:
+                    # the 8th bit is set, it's an utf8 strnig
+                    content_utf8 = self.getslice(start, i-1)
+                    content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+                else:
+                    # ascii only, fast path (ascii is a strict subset of
+                    # latin1, and we already checked that all the chars are <
+                    # 128)
+                    content_unicode = strslice2unicode_latin1(self.s, start, i-1)
+                self.last_type = TYPE_STRING
+                self.pos = i
+                return self.space.wrap(content_unicode)
+            elif ch == '\\':
+                content_so_far = self.getslice(start, i-1)
+                self.pos = i-1
+                return self.decode_string_escaped(start, content_so_far)
+            elif ch == '\0':
+                self._raise("Unterminated string starting at char %d", start)
+
+
+    def decode_string_escaped(self, start, content_so_far):
+        builder = StringBuilder(len(content_so_far)*2) # just an estimate
+        builder.append(content_so_far)
+        i = self.pos
+        while True:
+            ch = self.ll_chars[i]
+            i += 1
+            if ch == '"':
+                content_utf8 = builder.build()
+                content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+                self.last_type = TYPE_STRING
+                self.pos = i
+                return self.space.wrap(content_unicode)
+            elif ch == '\\':
+                i = self.decode_escape_sequence(i, builder)
+            elif ch == '\0':
+                self._raise("Unterminated string starting at char %d", start)
+            else:
+                builder.append_multiple_char(ch, 1) # we should implement append_char
+
+    def decode_escape_sequence(self, i, builder):
+        ch = self.ll_chars[i]
+        i += 1
+        put = builder.append_multiple_char
+        if ch == '\\':  put('\\', 1)
+        elif ch == '"': put('"' , 1)
+        elif ch == '/': put('/' , 1)
+        elif ch == 'b': put('\b', 1)
+        elif ch == 'f': put('\f', 1)
+        elif ch == 'n': put('\n', 1)
+        elif ch == 'r': put('\r', 1)
+        elif ch == 't': put('\t', 1)
+        elif ch == 'u':
+            return self.decode_escape_sequence_unicode(i, builder)
+        else:
+            self._raise("Invalid \\escape: %s (char %d)", ch, self.pos-1)
+        return i
+
+    def decode_escape_sequence_unicode(self, i, builder):
+        # at this point we are just after the 'u' of the \u1234 sequence.
+        start = i
+        i += 4
+        hexdigits = self.getslice(start, i)
+        try:
+            val = int(hexdigits, 16)
+            if val & 0xfc00 == 0xd800:
+                # surrogate pair
+                val = self.decode_surrogate_pair(i, val)
+                i += 6
+        except ValueError:
+            self._raise("Invalid \uXXXX escape (char %d)", i-1)
+            return # help the annotator to know that we'll never go beyond
+                   # this point
+        #
+        uchr = unichr(val)
+        utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
+        builder.append(utf8_ch)
+        return i
+
+    def decode_surrogate_pair(self, i, highsurr):
+        if self.ll_chars[i] != '\\' or self.ll_chars[i+1] != 'u':
+            self._raise("Unpaired high surrogate at char %d", i)
+        i += 2
+        hexdigits = self.getslice(i, i+4)
+        lowsurr = int(hexdigits, 16) # the possible ValueError is caugth by the caller
+        return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
+
+def loads(space, w_s):
+    if space.isinstance_w(w_s, space.w_unicode):
+        raise OperationError(space.w_TypeError,
+                             space.wrap("Expected utf8-encoded str, got unicode"))
+    s = space.str_w(w_s)
+    decoder = JSONDecoder(space, s)
+    try:
+        w_res = decoder.decode_any(0)
+        i = decoder.skip_whitespace(decoder.pos)
+        if i < len(s):
+            start = i
+            end = len(s) - 1
+            raise operationerrfmt(space.w_ValueError, "Extra data: char %d - %d", start, end)
+        return w_res
+    finally:
+        decoder.close()
diff --git a/pypy/module/_pypyjson/targetjson.py b/pypy/module/_pypyjson/targetjson.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/targetjson.py
@@ -0,0 +1,143 @@
+import sys
+import py
+ROOT = py.path.local(__file__).dirpath('..', '..', '..')
+sys.path.insert(0, str(ROOT))
+
+import time
+from rpython.rlib.streamio import open_file_as_stream
+from pypy.interpreter.error import OperationError
+from pypy.module._pypyjson.interp_decoder import loads
+
+
+
+## MSG = open('msg.json').read()
+
+class W_Root(object):
+    pass
+
+class W_Dict(W_Root):
+    def __init__(self):
+        self.dictval = {}
+
+class W_Unicode(W_Root):
+    def __init__(self, x):
+        self.unival = x
+
+class W_String(W_Root):
+    def __init__(self, x):
+        self.strval = x
+
+class W_Int(W_Root):
+    def __init__(self, x):
+        self.intval = x
+
+class W_Float(W_Root):
+    def __init__(self, x):
+        self.floatval = x
+
+class W_List(W_Root):
+    def __init__(self):
+        self.listval = []
+
+class W_Singleton(W_Root):
+    def __init__(self, name):
+        self.name = name
+
+class FakeSpace(object):
+
+    w_None = W_Singleton('None')
+    w_True = W_Singleton('True')
+    w_False = W_Singleton('False')
+    w_ValueError = W_Singleton('ValueError')
+    w_UnicodeDecodeError = W_Singleton('UnicodeDecodeError')
+    w_unicode = W_Unicode
+    w_int = W_Int
+    w_float = W_Float
+
+    def newtuple(self, items):
+        return None
+
+    def newdict(self):
+        return W_Dict()
+
+    def newlist(self, items):
+        return W_List()
+
+    def isinstance_w(self, w_x, w_type):
+        return isinstance(w_x, w_type)
+
+    def str_w(self, w_x):
+        assert isinstance(w_x, W_String)
+        return w_x.strval
+
+    def call_method(self, obj, name, arg):
+        assert name == 'append'
+        assert isinstance(obj, W_List)
+        obj.listval.append(arg)
+    call_method._dont_inline_ = True
+
+    def call_function(self, w_func, *args_w):
+        return self.w_None # XXX
+
+    def setitem(self, d, key, value):
+        assert isinstance(d, W_Dict)
+        assert isinstance(key, W_Unicode)
+        d.dictval[key.unival] = value
+
+    def wrapunicode(self, x):
+        return W_Unicode(x)
+
+    def wrapint(self, x):
+        return W_Int(x)
+
+    def wrapfloat(self, x):
+        return W_Float(x)
+    
+    def wrap(self, x):
+        if isinstance(x, int):
+            return W_Int(x)
+        elif isinstance(x, float):
+            return W_Float(x)
+        ## elif isinstance(x, str):
+        ##     assert False
+        else:
+            return W_Unicode(unicode(x))
+    wrap._annspecialcase_ = "specialize:argtype(1)"
+
+
+fakespace = FakeSpace()
+
+def myloads(msg):
+    return loads(fakespace, W_String(msg))
+    
+
+def bench(title, N, fn, arg):
+    a = time.clock()
+    for i in range(N):
+        res = fn(arg)
+    b = time.clock()
+    print title, (b-a) / N * 1000
+
+def entry_point(argv):
+    if len(argv) != 3:
+        print 'Usage: %s FILE n' % argv[0]
+        return 1
+    filename = argv[1]
+    N = int(argv[2])
+    f = open_file_as_stream(filename)
+    msg = f.readall()
+    
+    try:
+        bench('loads     ', N, myloads,  msg)
+    except OperationError, e:
+        print 'Error', e._compute_value(fakespace)
+        
+    return 0
+
+# _____ Define and setup target ___
+
+def target(*args):
+    return entry_point, None
+
+if __name__ == '__main__':
+    entry_point(sys.argv)
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -0,0 +1,188 @@
+# -*- encoding: utf-8 -*-
+import py
+from pypy.module._pypyjson.interp_decoder import JSONDecoder
+
+def test_skip_whitespace():
+    s = '   hello   '
+    dec = JSONDecoder('fake space', s)
+    assert dec.pos == 0
+    assert dec.skip_whitespace(0) == 3
+    assert dec.skip_whitespace(3) == 3
+    assert dec.skip_whitespace(8) == len(s)
+    dec.close()
+
+    
+
+class AppTest(object):
+    spaceconfig = {"objspace.usemodules._pypyjson": True}
+
+    def test_raise_on_unicode(self):
+        import _pypyjson
+        raises(TypeError, _pypyjson.loads, u"42")
+
+
+    def test_decode_constants(self):
+        import _pypyjson
+        assert _pypyjson.loads('null') is None
+        raises(ValueError, _pypyjson.loads, 'nul')
+        raises(ValueError, _pypyjson.loads, 'nu')
+        raises(ValueError, _pypyjson.loads, 'n')
+        raises(ValueError, _pypyjson.loads, 'nuXX')
+        #
+        assert _pypyjson.loads('true') is True
+        raises(ValueError, _pypyjson.loads, 'tru')
+        raises(ValueError, _pypyjson.loads, 'tr')
+        raises(ValueError, _pypyjson.loads, 't')
+        raises(ValueError, _pypyjson.loads, 'trXX')
+        #
+        assert _pypyjson.loads('false') is False
+        raises(ValueError, _pypyjson.loads, 'fals')
+        raises(ValueError, _pypyjson.loads, 'fal')
+        raises(ValueError, _pypyjson.loads, 'fa')
+        raises(ValueError, _pypyjson.loads, 'f')
+        raises(ValueError, _pypyjson.loads, 'falXX')
+        
+
+    def test_decode_string(self):
+        import _pypyjson
+        res = _pypyjson.loads('"hello"')
+        assert res == u'hello'
+        assert type(res) is unicode
+
+    def test_decode_string_utf8(self):
+        import _pypyjson
+        s = u'àèìòù'
+        res = _pypyjson.loads('"%s"' % s.encode('utf-8'))
+        assert res == s
+
+    def test_skip_whitespace(self):
+        import _pypyjson
+        s = '   "hello"   '
+        assert _pypyjson.loads(s) == u'hello'
+        s = '   "hello"   extra'
+        raises(ValueError, "_pypyjson.loads(s)")
+
+    def test_unterminated_string(self):
+        import _pypyjson
+        s = '"hello' # missing the trailing "
+        raises(ValueError, "_pypyjson.loads(s)")
+
+    def test_escape_sequence(self):
+        import _pypyjson
+        assert _pypyjson.loads(r'"\\"') == u'\\'
+        assert _pypyjson.loads(r'"\""') == u'"'
+        assert _pypyjson.loads(r'"\/"') == u'/'       
+        assert _pypyjson.loads(r'"\b"') == u'\b'
+        assert _pypyjson.loads(r'"\f"') == u'\f'
+        assert _pypyjson.loads(r'"\n"') == u'\n'
+        assert _pypyjson.loads(r'"\r"') == u'\r'
+        assert _pypyjson.loads(r'"\t"') == u'\t'
+
+    def test_escape_sequence_in_the_middle(self):
+        import _pypyjson
+        s = r'"hello\nworld"'
+        assert _pypyjson.loads(s) == "hello\nworld"
+
+    def test_unterminated_string_after_escape_sequence(self):
+        import _pypyjson
+        s = r'"hello\nworld' # missing the trailing "
+        raises(ValueError, "_pypyjson.loads(s)")
+        
+    def test_escape_sequence_unicode(self):
+        import _pypyjson
+        s = r'"\u1234"'
+        assert _pypyjson.loads(s) == u'\u1234'
+
+    def test_invalid_utf_8(self):
+        import _pypyjson
+        s = '"\xe0"' # this is an invalid UTF8 sequence inside a string
+        raises(UnicodeDecodeError, "_pypyjson.loads(s)")
+
+    def test_decode_numeric(self):
+        import sys
+        import _pypyjson
+        def check(s, val):
+            res = _pypyjson.loads(s)
+            assert type(res) is type(val)
+            assert res == val
+        #
+        check('42', 42)
+        check('-42', -42)
+        check('42.123', 42.123)
+        check('42E0', 42.0)
+        check('42E3', 42000.0)
+        check('42E-1', 4.2)
+        check('42E+1', 420.0)
+        check('42.123E3', 42123.0)
+        check('0', 0)
+        check('-0', 0)
+        check('0.123', 0.123)
+        check('0E3', 0.0)
+        check('5E0001', 50.0)
+        check(str(1 << 32), 1 << 32)
+        check(str(1 << 64), 1 << 64)
+        #
+        x = str(sys.maxint+1) + '.123'
+        check(x, float(x))
+        x = str(sys.maxint+1) + 'E1'
+        check(x, float(x))
+        x = str(sys.maxint+1) + 'E-1'
+        check(x, float(x))
+        #
+        check('1E400', float('inf'))
+        ## # these are non-standard but supported by CPython json
+        check('Infinity', float('inf'))
+        check('-Infinity', float('-inf'))
+
+    def test_nan(self):
+        import math
+        import _pypyjson
+        res = _pypyjson.loads('NaN')
+        assert math.isnan(res)
+
+    def test_decode_numeric_invalid(self):
+        import _pypyjson
+        def error(s):
+            raises(ValueError, _pypyjson.loads, s)
+        #
+        error('  42   abc')
+        error('.123')
+        error('+123')
+        error('12.')
+        error('12.-3')
+        error('12E')
+        error('12E-')
+        error('0123') # numbers can't start with 0
+
+    def test_decode_object(self):
+        import _pypyjson
+        assert _pypyjson.loads('{}') == {}
+        assert _pypyjson.loads('{  }') == {}
+        #
+        s = '{"hello": "world", "aaa": "bbb"}'
+        assert _pypyjson.loads(s) == {'hello': 'world',
+                                      'aaa': 'bbb'}
+        raises(ValueError, _pypyjson.loads, '{"key"')
+        raises(ValueError, _pypyjson.loads, '{"key": 42')
+
+    def test_decode_object_nonstring_key(self):
+        import _pypyjson
+        raises(ValueError, "_pypyjson.loads('{42: 43}')")
+        
+    def test_decode_array(self):
+        import _pypyjson
+        assert _pypyjson.loads('[]') == []
+        assert _pypyjson.loads('[  ]') == []
+        assert _pypyjson.loads('[1]') == [1]
+        assert _pypyjson.loads('[1, 2]') == [1, 2]
+        raises(ValueError, "_pypyjson.loads('[1: 2]')")
+        raises(ValueError, "_pypyjson.loads('[1, 2')")
+        raises(ValueError, """_pypyjson.loads('["extra comma",]')""")
+        
+    def test_unicode_surrogate_pair(self):
+        import _pypyjson
+        expected = u'z\U0001d120x'
+        res = _pypyjson.loads('"z\\ud834\\udd20x"')
+        assert res == expected
+
+
diff --git a/pypy/module/micronumpy/interp_arrayops.py b/pypy/module/micronumpy/interp_arrayops.py
--- a/pypy/module/micronumpy/interp_arrayops.py
+++ b/pypy/module/micronumpy/interp_arrayops.py
@@ -65,7 +65,7 @@
            [ 3.,  4., -1.],
            [-1., -1., -1.]])
 
-    
+
     NOTE: support for not passing x and y is unsupported
     """
     if space.is_none(w_y):
@@ -122,10 +122,10 @@
             for f in dtype.fields:
                 if f not in a_dt.fields or \
                              dtype.fields[f] != a_dt.fields[f]:
-                    raise OperationError(space.w_TypeError, 
+                    raise OperationError(space.w_TypeError,
                                space.wrap("record type mismatch"))
         elif dtype.is_record_type() or a_dt.is_record_type():
-            raise OperationError(space.w_TypeError, 
+            raise OperationError(space.w_TypeError,
                         space.wrap("invalid type promotion"))
         dtype = interp_ufuncs.find_binop_result_dtype(space, dtype,
                                                       arr.get_dtype())
diff --git a/pypy/module/micronumpy/iter.py b/pypy/module/micronumpy/iter.py
--- a/pypy/module/micronumpy/iter.py
+++ b/pypy/module/micronumpy/iter.py
@@ -46,6 +46,7 @@
      calculate_slice_strides
 from pypy.module.micronumpy.base import W_NDimArray
 from pypy.module.micronumpy.arrayimpl import base
+from pypy.module.micronumpy.support import product
 from rpython.rlib import jit
 
 # structures to describe slicing
@@ -225,7 +226,7 @@
         self.shape = shape
         self.offset = start
         self.shapelen = len(shape)
-        self._done = False
+        self._done = self.shapelen == 0 or product(shape) == 0
         self.strides = strides
         self.backstrides = backstrides
         self.size = array.size
@@ -284,7 +285,7 @@
             self.backstrides = backstrides[:dim] + [0] + backstrides[dim:]
         self.first_line = True
         self.indices = [0] * len(shape)
-        self._done = False
+        self._done = array.get_size() == 0
         self.offset = array.start
         self.dim = dim
         self.array = array
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -293,6 +293,14 @@
         b = array(a, copy=False, ndmin=4)
         b[0,0,0,0] = 0
         assert a[0, 0] == 0
+        a = array([[[]]])
+        # Simulate tiling an empty array, really tests repeat, reshape
+        # b = tile(a, (3, 2, 5))
+        reps = (3, 4, 5)
+        c = array(a, copy=False, subok=True, ndmin=len(reps))
+        d = c.reshape(3, 4, 0)
+        e = d.repeat(3, 0)
+        assert e.shape == (9, 4, 0)
 
     def test_type(self):
         from numpypy import array
@@ -2562,6 +2570,9 @@
         a = array(range(100) + range(100) + range(100))
         b = a.argsort()
         assert (b[:3] == [0, 100, 200]).all()
+        a = array([[[]]]).reshape(3,4,0)
+        b = a.argsort()
+        assert b.size == 0
 
     def test_argsort_random(self):
         from numpypy import array
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -131,18 +131,19 @@
     def has_id(self, id):
         return id in self.ids
 
-    def _ops_for_chunk(self, chunk, include_debug_merge_points):
+    def _ops_for_chunk(self, chunk, include_guard_not_invalidated):
         for op in chunk.operations:
-            if op.name != 'debug_merge_point' or include_debug_merge_points:
+            if op.name != 'debug_merge_point' and \
+                (op.name != 'guard_not_invalidated' or include_guard_not_invalidated):
                 yield op
 
-    def _allops(self, include_debug_merge_points=False, opcode=None):
+    def _allops(self, opcode=None, include_guard_not_invalidated=True):
         opcode_name = opcode
         for chunk in self.flatten_chunks():
             opcode = chunk.getopcode()
             if opcode_name is None or \
                    (opcode and opcode.__class__.__name__ == opcode_name):
-                for op in self._ops_for_chunk(chunk, include_debug_merge_points):
+                for op in self._ops_for_chunk(chunk, include_guard_not_invalidated):
                     yield op
             else:
                for op in  chunk.operations:
@@ -162,15 +163,15 @@
     def print_ops(self, *args, **kwds):
         print self.format_ops(*args, **kwds)
 
-    def _ops_by_id(self, id, include_debug_merge_points=False, opcode=None):
+    def _ops_by_id(self, id, include_guard_not_invalidated=True, opcode=None):
         opcode_name = opcode
         target_opcodes = self.ids[id]
-        loop_ops = self.allops(include_debug_merge_points, opcode)
+        loop_ops = self.allops(opcode)
         for chunk in self.flatten_chunks():
             opcode = chunk.getopcode()
             if opcode in target_opcodes and (opcode_name is None or
                                              opcode.__class__.__name__ == opcode_name):
-                for op in self._ops_for_chunk(chunk, include_debug_merge_points):
+                for op in self._ops_for_chunk(chunk, include_guard_not_invalidated):
                     if op in loop_ops:
                         yield op
 
diff --git a/pypy/module/pypyjit/test_pypy_c/test_containers.py b/pypy/module/pypyjit/test_pypy_c/test_containers.py
--- a/pypy/module/pypyjit/test_pypy_c/test_containers.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_containers.py
@@ -223,5 +223,5 @@
         log = self.run(main, [1000])
         assert log.result == main(1000)
         loop, = log.loops_by_filename(self.filepath)
-        ops = loop.ops_by_id('getitem')
+        ops = loop.ops_by_id('getitem', include_guard_not_invalidated=False)
         assert log.opnames(ops) == []
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -80,7 +80,7 @@
             i23 = strgetitem(p10, i19)
             p25 = newstr(1)
             strsetitem(p25, 0, i23)
-            p93 = call(ConstClass(fromstr), p25, 16, ConstPtr(ptr70), descr=<Callr . rir EF=3>)
+            p93 = call(ConstClass(fromstr), p25, 16, descr=<Callr . ri EF=3>)
             guard_no_exception(descr=...)
             i94 = call(ConstClass(rbigint.toint), p93, descr=<Calli . r EF=3>)
             guard_no_exception(descr=...)
diff --git a/pypy/tool/gdb_pypy.py b/pypy/tool/gdb_pypy.py
--- a/pypy/tool/gdb_pypy.py
+++ b/pypy/tool/gdb_pypy.py
@@ -76,18 +76,22 @@
 
     def invoke(self, arg, from_tty):
         # some magic code to automatically reload the python file while developing
-        ## from pypy.tool import gdb_pypy
-        ## reload(gdb_pypy)
-        ## gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
-        ## self.__class__ = gdb_pypy.RPyType
+        from pypy.tool import gdb_pypy
+        reload(gdb_pypy)
+        gdb_pypy.RPyType.prog2typeids = self.prog2typeids # persist the cache
+        self.__class__ = gdb_pypy.RPyType
         print self.do_invoke(arg, from_tty)
 
     def do_invoke(self, arg, from_tty):
-        obj = self.gdb.parse_and_eval(arg)
-        hdr = lookup(obj, '_gcheader')
-        tid = hdr['h_tid']
-        offset = tid & 0xFFFFFFFF # 64bit only
-        offset = int(offset) # convert from gdb.Value to python int
+        try:
+            offset = int(arg)
+        except ValueError:
+            obj = self.gdb.parse_and_eval(arg)
+            hdr = lookup(obj, '_gcheader')
+            tid = hdr['h_tid']
+            offset = tid & 0xFFFFFFFF # 64bit only
+            offset = int(offset) # convert from gdb.Value to python int
+
         typeids = self.get_typeids()
         if offset in typeids:
             return typeids[offset]
diff --git a/rpython/jit/backend/arm/test/conftest.py b/rpython/jit/backend/arm/test/conftest.py
--- a/rpython/jit/backend/arm/test/conftest.py
+++ b/rpython/jit/backend/arm/test/conftest.py
@@ -16,7 +16,5 @@
                     dest="run_translation_tests",
                     help="run tests that translate code")
 
-def pytest_collect_directory(path, parent):
-    if not cpu.startswith('arm'):
-        py.test.skip("ARM(v7) tests skipped: cpu is %r" % (cpu,))
-pytest_collect_file = pytest_collect_directory
+def pytest_ignore_collect(path, config):
+    return not cpu.startswith('arm')
diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -119,7 +119,7 @@
         # detect version of current compiler
         returncode, stdout, stderr = _run_subprocess(self.cc, '',
                                                      env=self.c_environ)
-        r = re.match(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr)
+        r = re.search(r'Microsoft.+C/C\+\+.+\s([0-9]+)\.([0-9]+).*', stderr)
         if r is not None:
             self.version = int(''.join(r.groups())) / 10 - 60
         else:


More information about the pypy-commit mailing list