[pypy-commit] pypy better-storesink: merge

Mon Sep 19 15:55:51 EDT 2016

Author: Carl Friedrich Bolz <cfbolz at gmx.de>
Branch: better-storesink
Changeset: r87236:ce6f32acf15c
Date: 2016-09-19 21:54 +0200
http://bitbucket.org/pypy/pypy/changeset/ce6f32acf15c/

Log:	merge

diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -449,6 +449,27 @@
   support (see ``multiline_input()``).  On the other hand,
   ``parse_and_bind()`` calls are ignored (issue `#2072`_).
 
+* ``sys.getsizeof()`` always raises ``TypeError``.  This is because a
+  memory profiler using this function is most likely to give results
+  inconsistent with reality on PyPy.  It would be possible to have
+  ``sys.getsizeof()`` return a number (with enough work), but that may
+  or may not represent how much memory the object uses.  It doesn't even
+  make really sense to ask how much *one* object uses, in isolation with
+  the rest of the system.  For example, instances have maps, which are
+  often shared across many instances; in this case the maps would
+  probably be ignored by an implementation of ``sys.getsizeof()``, but
+  their overhead is important in some cases if they are many instances
+  with unique maps.  Conversely, equal strings may share their internal
+  string data even if they are different objects---or empty containers
+  may share parts of their internals as long as they are empty.  Even
+  stranger, some lists create objects as you read them; if you try to
+  estimate the size in memory of ``range(10**6)`` as the sum of all
+  items' size, that operation will by itself create one million integer
+  objects that never existed in the first place.  Note that some of
+  these concerns also exist on CPython, just less so.  For this reason
+  we explicitly don't implement ``sys.getsizeof()``.
+
+
 .. _`is ignored in PyPy`: http://bugs.python.org/issue14621
 .. _`little point`: http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html
 .. _`#2072`: https://bitbucket.org/pypy/pypy/issue/2072/
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -16,3 +16,8 @@
 Improve merging of virtual states in the JIT in order to avoid jumping to the
 preamble. Accomplished by allocating virtual objects where non-virtuals are
 expected.
+
+.. branch: conditional_call_value_3
+JIT residual calls: if the called function starts with a fast-path
+like "if x.foo != 0: return x.foo", then inline the check before
+doing the CALL.  For now, string hashing is about the only case.
diff --git a/pypy/interpreter/generator.py b/pypy/interpreter/generator.py
--- a/pypy/interpreter/generator.py
+++ b/pypy/interpreter/generator.py
@@ -63,7 +63,7 @@
         """x.__iter__() <==> iter(x)"""
         return self.space.wrap(self)
 
-    def descr_send(self, w_arg=None):
+    def descr_send(self, w_arg):
         """send(arg) -> send 'arg' into generator,
 return next yielded value or raise StopIteration."""
         return self.send_ex(w_arg)
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -264,25 +264,22 @@
         try:
             executioncontext.call_trace(self)
             #
-            if operr is not None:
-                ec = self.space.getexecutioncontext()
-                next_instr = self.handle_operation_error(ec, operr)
-                self.last_instr = intmask(next_instr - 1)
-            else:
-                # Execution starts just after the last_instr.  Initially,
-                # last_instr is -1.  After a generator suspends it points to
-                # the YIELD_VALUE instruction.
-                next_instr = r_uint(self.last_instr + 1)
-                if next_instr != 0:
-                    self.pushvalue(w_inputvalue)
-            #
             try:
+                if operr is not None:
+                    ec = self.space.getexecutioncontext()
+                    next_instr = self.handle_operation_error(ec, operr)
+                    self.last_instr = intmask(next_instr - 1)
+                else:
+                    # Execution starts just after the last_instr.  Initially,
+                    # last_instr is -1.  After a generator suspends it points to
+                    # the YIELD_VALUE instruction.
+                    next_instr = r_uint(self.last_instr + 1)
+                    if next_instr != 0:
+                        self.pushvalue(w_inputvalue)
                 w_exitvalue = self.dispatch(self.pycode, next_instr,
                                             executioncontext)
-            except Exception:
-                executioncontext.return_trace(self, self.space.w_None)
-                raise
-            executioncontext.return_trace(self, w_exitvalue)
+            finally:
+                executioncontext.return_trace(self, w_exitvalue)
             # it used to say self.last_exception = None
             # this is now done by the code in pypyjit module
             # since we don't want to invalidate the virtualizable
diff --git a/pypy/interpreter/test/test_generator.py b/pypy/interpreter/test/test_generator.py
--- a/pypy/interpreter/test/test_generator.py
+++ b/pypy/interpreter/test/test_generator.py
@@ -57,12 +57,14 @@
         def f():
             yield 2
         g = f()
+        # two arguments version
         raises(NameError, g.throw, NameError, "Error")
 
     def test_throw2(self):
         def f():
             yield 2
         g = f()
+        # single argument version
         raises(NameError, g.throw, NameError("Error"))
 
     def test_throw3(self):
@@ -221,7 +223,8 @@
         def f():
             yield 1
         g = f()
-        raises(TypeError, g.send, 1)
+        raises(TypeError, g.send)     # one argument required
+        raises(TypeError, g.send, 1)  # not started, must send None
 
     def test_generator_explicit_stopiteration(self):
         def f():
diff --git a/pypy/interpreter/test/test_pyframe.py b/pypy/interpreter/test/test_pyframe.py
--- a/pypy/interpreter/test/test_pyframe.py
+++ b/pypy/interpreter/test/test_pyframe.py
@@ -562,3 +562,21 @@
         res = f(10).g()
         sys.settrace(None)
         assert res == 10
+
+    def test_throw_trace_bug(self):
+        import sys
+        def f():
+            yield 5
+        gen = f()
+        assert next(gen) == 5
+        seen = []
+        def trace_func(frame, event, *args):
+            seen.append(event)
+            return trace_func
+        sys.settrace(trace_func)
+        try:
+            gen.throw(ValueError)
+        except ValueError:
+            pass
+        sys.settrace(None)
+        assert seen == ['call', 'exception', 'return']
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -30,16 +30,19 @@
             raise oefmt(space.w_TypeError,
                         "array.array() does not take keyword arguments")
 
+    w_initializer_type = None
+    w_initializer = None
+    if len(__args__.arguments_w) > 0:
+        w_initializer = __args__.arguments_w[0]
+        w_initializer_type = space.type(w_initializer)
     for tc in unroll_typecodes:
         if typecode == tc:
             a = space.allocate_instance(types[tc].w_class, w_cls)
             a.__init__(space)
-
-            if len(__args__.arguments_w) > 0:
-                w_initializer = __args__.arguments_w[0]
-                if space.type(w_initializer) is space.w_str:
+            if w_initializer is not None:
+                if w_initializer_type is space.w_str:
                     a.descr_fromstring(space, w_initializer)
-                elif space.type(w_initializer) is space.w_list:
+                elif w_initializer_type is space.w_list:
                     a.descr_fromlist(space, w_initializer)
                 else:
                     a.extend(w_initializer, True)
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -6,7 +6,7 @@
 from pypy.interpreter.error import OperationError
 from pypy.module.cpyext.intobject import PyInt_AsUnsignedLongMask
 from rpython.rlib.rbigint import rbigint
-from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.rarithmetic import widen
 
 
 PyLong_Check, PyLong_CheckExact = build_type_checkers("Long")
@@ -34,7 +34,7 @@
 def PyLong_FromLongLong(space, val):
     """Return a new PyLongObject object from a C long long, or NULL
     on failure."""
-    return space.wrap(val)
+    return space.newlong(val)
 
 @cpython_api([rffi.ULONG], PyObject)
 def PyLong_FromUnsignedLong(space, val):
@@ -203,7 +203,7 @@
     can be retrieved from the resulting value using PyLong_AsVoidPtr().
 
     If the integer is larger than LONG_MAX, a positive long integer is returned."""
-    return space.wrap(rffi.cast(ADDR, p))
+    return space.newlong(rffi.cast(ADDR, p))
 
 @cpython_api([PyObject], rffi.VOIDP, error=lltype.nullptr(rffi.VOIDP.TO))
 def PyLong_AsVoidPtr(space, w_long):
diff --git a/pypy/module/cpyext/pytraceback.py b/pypy/module/cpyext/pytraceback.py
--- a/pypy/module/cpyext/pytraceback.py
+++ b/pypy/module/cpyext/pytraceback.py
@@ -5,7 +5,6 @@
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, from_ref, Py_DecRef, make_typedescr)
 from pypy.module.cpyext.frameobject import PyFrameObject
-from rpython.rlib.unroll import unrolling_iterable
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.pytraceback import PyTraceback
 from pypy.interpreter import pycode
diff --git a/pypy/module/cpyext/test/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -8,18 +8,20 @@
 
 class TestLongObject(BaseApiTest):
     def test_FromLong(self, space, api):
-        value = api.PyLong_FromLong(3)
-        assert isinstance(value, W_LongObject)
-        assert space.unwrap(value) == 3
+        w_value = api.PyLong_FromLong(3)
+        assert isinstance(w_value, W_LongObject)
+        assert space.unwrap(w_value) == 3
 
-        value = api.PyLong_FromLong(sys.maxint)
-        assert isinstance(value, W_LongObject)
-        assert space.unwrap(value) == sys.maxint
+        w_value = api.PyLong_FromLong(sys.maxint)
+        assert isinstance(w_value, W_LongObject)
+        assert space.unwrap(w_value) == sys.maxint
 
     def test_aslong(self, space, api):
         w_value = api.PyLong_FromLong((sys.maxint - 1) / 2)
+        assert isinstance(w_value, W_LongObject)
 
         w_value = space.mul(w_value, space.wrap(2))
+        assert isinstance(w_value, W_LongObject)
         value = api.PyLong_AsLong(w_value)
         assert value == (sys.maxint - 1)
 
@@ -35,12 +37,16 @@
 
     def test_as_ssize_t(self, space, api):
         w_value = space.newlong(2)
+        assert isinstance(w_value, W_LongObject)
         value = api.PyLong_AsSsize_t(w_value)
         assert value == 2
-        assert space.eq_w(w_value, api.PyLong_FromSsize_t(2))
+        w_val2 = api.PyLong_FromSsize_t(2)
+        assert isinstance(w_val2, W_LongObject)
+        assert space.eq_w(w_value, w_val2)
 
     def test_fromdouble(self, space, api):
         w_value = api.PyLong_FromDouble(-12.74)
+        assert isinstance(w_value, W_LongObject)
         assert space.unwrap(w_value) == -12
         assert api.PyLong_AsDouble(w_value) == -12
 
@@ -103,6 +109,7 @@
 
     def test_as_voidptr(self, space, api):
         w_l = api.PyLong_FromVoidPtr(lltype.nullptr(rffi.VOIDP.TO))
+        assert isinstance(w_l, W_LongObject)
         assert space.unwrap(w_l) == 0L
         assert api.PyLong_AsVoidPtr(w_l) == lltype.nullptr(rffi.VOIDP.TO)
 
@@ -128,23 +135,58 @@
         module = self.import_extension('foo', [
             ("from_unsignedlong", "METH_NOARGS",
              """
-                 return PyLong_FromUnsignedLong((unsigned long)-1);
+                 PyObject * obj;
+                 obj = PyLong_FromUnsignedLong((unsigned long)-1);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """)])
         import sys
         assert module.from_unsignedlong() == 2 * sys.maxint + 1
 
     def test_fromlonglong(self):
         module = self.import_extension('foo', [
-            ("from_longlong", "METH_NOARGS",
+            ("from_longlong", "METH_VARARGS",
              """
-                 return PyLong_FromLongLong((long long)-1);
+                 int val;
+                 PyObject * obj;
+                 if (!PyArg_ParseTuple(args, "i", &val))
+                     return NULL;
+                 obj = PyLong_FromLongLong((long long)val);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """),
-            ("from_unsignedlonglong", "METH_NOARGS",
+            ("from_unsignedlonglong", "METH_VARARGS",
              """
-                 return PyLong_FromUnsignedLongLong((unsigned long long)-1);
+                 int val;
+                 PyObject * obj;
+                 if (!PyArg_ParseTuple(args, "i", &val))
+                     return NULL;
+                 obj = PyLong_FromUnsignedLongLong((long long)val);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """)])
-        assert module.from_longlong() == -1
-        assert module.from_unsignedlonglong() == (1<<64) - 1
+        assert module.from_longlong(-1) == -1
+        assert module.from_longlong(0) == 0
+        assert module.from_unsignedlonglong(0) == 0
+        assert module.from_unsignedlonglong(-1) == (1<<64) - 1
 
     def test_from_size_t(self):
         module = self.import_extension('foo', [
@@ -232,10 +274,15 @@
             ("has_sub", "METH_NOARGS",
              """
                 PyObject *ret, *obj = PyLong_FromLong(42);
-                if (obj->ob_type->tp_as_number->nb_subtract)
-                    ret = obj->ob_type->tp_as_number->nb_subtract(obj, obj);
+                if (obj->ob_type != &PyLong_Type)
+                    ret = PyLong_FromLong(-2);
                 else
-                    ret = PyLong_FromLong(-1);
+                {
+                    if (obj->ob_type->tp_as_number->nb_subtract)
+                        ret = obj->ob_type->tp_as_number->nb_subtract(obj, obj);
+                    else
+                        ret = PyLong_FromLong(-1);
+                }
                 Py_DECREF(obj);
                 return ret;
              """),
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -253,9 +253,28 @@
     from rpython.rtyper.lltypesystem import lltype, rffi
     return space.wrap(rffi.cast(lltype.Signed, handle))
 
+getsizeof_missing = """sys.getsizeof() is not implemented on PyPy.
+
+A memory profiler using this function is most likely to give results
+inconsistent with reality on PyPy.  It would be possible to have
+sys.getsizeof() return a number (with enough work), but that may or
+may not represent how much memory the object uses.  It doesn't even
+make really sense to ask how much *one* object uses, in isolation
+with the rest of the system.  For example, instances have maps,
+which are often shared across many instances; in this case the maps
+would probably be ignored by an implementation of sys.getsizeof(),
+but their overhead is important in some cases if they are many
+instances with unique maps.  Conversely, equal strings may share
+their internal string data even if they are different objects---or
+empty containers may share parts of their internals as long as they
+are empty.  Even stranger, some lists create objects as you read
+them; if you try to estimate the size in memory of range(10**6) as
+the sum of all items' size, that operation will by itself create one
+million integer objects that never existed in the first place.
+"""
+
 def getsizeof(space, w_object, w_default=None):
-    """Not implemented on PyPy."""
     if w_default is None:
-        raise oefmt(space.w_TypeError,
-                    "sys.getsizeof() not implemented on PyPy")
+        raise oefmt(space.w_TypeError, getsizeof_missing)
     return w_default
+getsizeof.__doc__ = getsizeof_missing
diff --git a/rpython/translator/backendopt/merge_if_blocks.py b/rpython/translator/backendopt/merge_if_blocks.py
--- a/rpython/translator/backendopt/merge_if_blocks.py
+++ b/rpython/translator/backendopt/merge_if_blocks.py
@@ -20,6 +20,14 @@
         return False
     if isinstance(op.args[0], Constant) and isinstance(op.args[1], Constant):
         return False
+    # check that the constant is hashable (ie not a symbolic)
+    try:
+        if isinstance(op.args[0], Constant):
+            hash(op.args[0].value)
+        else:
+            hash(op.args[1].value)
+    except TypeError:
+        return False
     return True
 
 def merge_chain(chain, checkvar, varmap, graph):
diff --git a/rpython/translator/backendopt/test/test_merge_if_blocks.py b/rpython/translator/backendopt/test/test_merge_if_blocks.py
--- a/rpython/translator/backendopt/test/test_merge_if_blocks.py
+++ b/rpython/translator/backendopt/test/test_merge_if_blocks.py
@@ -2,11 +2,12 @@
 from rpython.translator.backendopt.merge_if_blocks import merge_if_blocks
 from rpython.translator.backendopt.all import backend_optimizations
 from rpython.translator.translator import TranslationContext, graphof as tgraphof
-from rpython.flowspace.model import Block
+from rpython.flowspace.model import Block, checkgraph
 from rpython.translator.backendopt.removenoops import remove_same_as
 from rpython.rtyper.llinterp import LLInterpreter
 from rpython.rlib.rarithmetic import r_uint, r_ulonglong, r_longlong, r_int
 from rpython.annotator.model import SomeChar, SomeUnicodeCodePoint
+from rpython.rlib.objectmodel import CDefinedIntSymbolic
 
 def do_test_merge(fn, testvalues):
     t = TranslationContext()
@@ -225,3 +226,29 @@
     malloc.remove_mallocs(t, t.graphs)
     from rpython.translator import simplify
     simplify.join_blocks(graph)
+
+def test_switch_on_symbolic():
+    symb1 = CDefinedIntSymbolic("1", 1)
+    symb2 = CDefinedIntSymbolic("2", 2)
+    symb3 = CDefinedIntSymbolic("3", 3)
+    def fn(x):
+        res = 0
+        if x == symb1:
+            res += x + 1
+        elif x == symb2:
+            res += x + 2
+        elif x == symb3:
+            res += x + 3
+        res += 1
+        return res
+    t = TranslationContext()
+    a = t.buildannotator()
+    a.build_types(fn, [int])
+    rtyper = t.buildrtyper()
+    rtyper.specialize()
+    graph = t.graphs[0]
+    remove_same_as(graph)
+    res = merge_if_blocks_once(graph)
+    assert not res
+    checkgraph(graph)
+