[pypy-commit] pypy cpyext-ext: merge default into cpyext-ext

Fri Jun 3 06:50:44 EDT 2016

Author: Matti Picus <matti.picus at gmail.com>
Branch: cpyext-ext
Changeset: r84893:f6f66900d0d9
Date: 2016-06-03 13:49 +0300
http://bitbucket.org/pypy/pypy/changeset/f6f66900d0d9/

Log:	merge default into cpyext-ext

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -125,4 +125,9 @@
 
 .. branch: traceviewer-common-merge-point-formats
 
-Teach RPython JIT's off-line traceviewer the most common ``debug_merge_point`` formats.
\ No newline at end of file
+Teach RPython JIT's off-line traceviewer the most common ``debug_merge_point`` formats.
+
+.. branch: cpyext-pickle
+
+Enable pickling of W_PyCFunctionObject by monkeypatching pickle.Pickler.dispatch
+at cpyext import time
diff --git a/pypy/interpreter/astcompiler/codegen.py b/pypy/interpreter/astcompiler/codegen.py
--- a/pypy/interpreter/astcompiler/codegen.py
+++ b/pypy/interpreter/astcompiler/codegen.py
@@ -564,7 +564,6 @@
             self.emit_jump(ops.JUMP_FORWARD, end)
             self.use_next_block(next_except)
         self.emit_op(ops.END_FINALLY)   # this END_FINALLY will always re-raise
-        self.is_dead_code()
         self.use_next_block(otherwise)
         self.visit_sequence(te.orelse)
         self.use_next_block(end)
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -1784,3 +1784,9 @@
         assert ffi.list_types() == (['CFFIb', 'CFFIbb', 'CFFIbbb'],
                                     ['CFFIa', 'CFFIcc', 'CFFIccc'],
                                     ['CFFIaa', 'CFFIaaa', 'CFFIg'])
+
+    def test_FFIFunctionWrapper(self):
+        ffi, lib = self.prepare("void f(void);", "test_FFIFunctionWrapper",
+                                "void f(void) { }")
+        assert lib.f.__get__(42) is lib.f
+        assert lib.f.__get__(42, int) is lib.f
diff --git a/pypy/module/_cffi_backend/wrapper.py b/pypy/module/_cffi_backend/wrapper.py
--- a/pypy/module/_cffi_backend/wrapper.py
+++ b/pypy/module/_cffi_backend/wrapper.py
@@ -100,6 +100,11 @@
         doc = '%s;\n\nCFFI C function from %s.lib' % (doc, self.modulename)
         return space.wrap(doc)
 
+    def descr_get(self, space, w_obj, w_type=None):
+        # never bind anything, but a __get__ is still present so that
+        # pydoc displays useful information (namely, the __repr__)
+        return self
+
 
 @jit.unroll_safe
 def prepare_args(space, rawfunctype, args_w, start_index):
@@ -136,5 +141,6 @@
         __name__ = interp_attrproperty('fnname', cls=W_FunctionWrapper),
         __module__ = interp_attrproperty('modulename', cls=W_FunctionWrapper),
         __doc__ = GetSetProperty(W_FunctionWrapper.descr_get_doc),
+        __get__ = interp2app(W_FunctionWrapper.descr_get),
         )
 W_FunctionWrapper.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -1,4 +1,5 @@
 from pypy.interpreter.mixedmodule import MixedModule
+from pypy.interpreter import gateway
 from pypy.module.cpyext.state import State
 from pypy.module.cpyext import api
 
@@ -14,6 +15,12 @@
 
     def startup(self, space):
         space.fromcache(State).startup(space)
+        method = pypy.module.cpyext.typeobject.get_new_method_def(space)
+        w_obj = pypy.module.cpyext.methodobject.W_PyCFunctionObject(space, method, space.wrap(''))
+        space.appexec([space.type(w_obj)], """(methodtype):
+            from pickle import Pickler 
+            Pickler.dispatch[methodtype] = Pickler.save_global
+        """)
 
     def register_atexit(self, function):
         if len(self.atexit_funcs) >= 32:
@@ -66,6 +73,7 @@
 import pypy.module.cpyext.pyfile
 import pypy.module.cpyext.pystrtod
 import pypy.module.cpyext.pytraceback
+import pypy.module.cpyext.methodobject
 
 # now that all rffi_platform.Struct types are registered, configure them
 api.configure_types()
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -44,8 +44,8 @@
                    dealloc=cfunction_dealloc)
 
 def cfunction_attach(space, py_obj, w_obj):
+    assert isinstance(w_obj, W_PyCFunctionObject)
     py_func = rffi.cast(PyCFunctionObject, py_obj)
-    assert isinstance(w_obj, W_PyCFunctionObject)
     py_func.c_m_ml = w_obj.ml
     py_func.c_m_self = make_ref(space, w_obj.w_self)
     py_func.c_m_module = make_ref(space, w_obj.w_module)
diff --git a/pypy/module/cpyext/test/array.c b/pypy/module/cpyext/test/array.c
--- a/pypy/module/cpyext/test/array.c
+++ b/pypy/module/cpyext/test/array.c
@@ -1502,7 +1502,7 @@
 static PyObject *
 array_reduce(arrayobject *array)
 {
-    PyObject *dict, *result, *list;
+    PyObject *dict, *result, *list, *mod, *obj;
 
     dict = PyObject_GetAttrString((PyObject *)array, "__dict__");
     if (dict == NULL) {
@@ -1512,6 +1512,18 @@
         dict = Py_None;
         Py_INCREF(dict);
     }
+    /* Return a tuple of (callable object, typecode, values, state) */
+    mod = PyImport_ImportModule("array");
+    if (mod == NULL) {
+        Py_DECREF(dict);
+        return NULL;
+    }
+    obj = PyObject_GetAttrString(mod, "_reconstruct");
+    Py_DECREF(mod);
+    if (obj == NULL) {
+        Py_DECREF(dict);
+        return NULL;
+    }
     /* Unlike in Python 3.x, we never use the more efficient memory
      * representation of an array for pickling.  This is unfortunately
      * necessary to allow array objects to be unpickled by Python 3.x,
@@ -1524,7 +1536,7 @@
         return NULL;
     }
     result = Py_BuildValue(
-        "O(cO)O", Py_TYPE(array), array->ob_descr->typecode, list, dict);
+        "O(cO)O", obj, array->ob_descr->typecode, list, dict);
     Py_DECREF(list);
     Py_DECREF(dict);
     return result;
@@ -1916,6 +1928,11 @@
     char c;
     PyObject *initial = NULL, *it = NULL;
     struct arraydescr *descr;
+    if (type == NULL)
+    {
+        /* when called from _reconstruct */
+        type = &Arraytype;
+    }
 
     if (type == &Arraytype && !_PyArg_NoKeywords("array.array()", kwds))
         return NULL;
@@ -2017,6 +2034,11 @@
     return NULL;
 }
 
+static PyObject *
+_reconstruct(PyTypeObject *type, PyObject *args)
+{
+    return array_new(type, args, NULL);
+}
 
 PyDoc_STRVAR(module_doc,
 "This module defines an object type which can efficiently represent\n\
@@ -2223,6 +2245,7 @@
 
 /* No functions in array module. */
 static PyMethodDef a_methods[] = {
+    {"_reconstruct",   (PyCFunction)_reconstruct, METH_VARARGS, NULL},
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };
 
@@ -2244,6 +2267,8 @@
         return;
 
     Py_INCREF((PyObject *)&Arraytype);
+    if (PyType_Ready(&Arraytype) < 0)
+        return;
     PyModule_AddObject(m, "ArrayType", (PyObject *)&Arraytype);
     Py_INCREF((PyObject *)&Arraytype);
     PyModule_AddObject(m, "array", (PyObject *)&Arraytype);
diff --git a/pypy/module/cpyext/test/test_arraymodule.py b/pypy/module/cpyext/test/test_arraymodule.py
--- a/pypy/module/cpyext/test/test_arraymodule.py
+++ b/pypy/module/cpyext/test/test_arraymodule.py
@@ -67,3 +67,13 @@
                                 '\x02\0\0\0'
                                 '\x03\0\0\0'
                                 '\x04\0\0\0')
+
+    def test_pickle(self):
+        import pickle
+        module = self.import_module(name='array')
+        arr = module.array('i', [1,2,3,4])
+        s = pickle.dumps(arr)
+        # pypy exports __dict__ on cpyext objects, so the pickle picks up the {} state value
+        #assert s == "carray\n_reconstruct\np0\n(S'i'\np1\n(lp2\nI1\naI2\naI3\naI4\natp3\nRp4\n."
+        rra = pickle.loads(s) # rra is arr backwards
+        #assert arr.tolist() == rra.tolist()
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -176,9 +176,8 @@
         hexstring = hexstring.lower()
         data = []
         length = len(hexstring)
-        i = -2
+        i = 0
         while True:
-            i += 2
             while i < length and hexstring[i] == ' ':
                 i += 1
             if i >= length:
@@ -193,6 +192,7 @@
             if bot == -1:
                 raise oefmt(space.w_ValueError, NON_HEX_MSG, i + 1)
             data.append(chr(top*16 + bot))
+            i += 2
 
         # in CPython bytearray.fromhex is a staticmethod, so
         # we ignore w_type and always return a bytearray
diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py
--- a/pypy/objspace/std/objectobject.py
+++ b/pypy/objspace/std/objectobject.py
@@ -180,7 +180,13 @@
     if w_reduce is not None:
         w_cls = space.getattr(w_obj, space.wrap('__class__'))
         w_cls_reduce_meth = space.getattr(w_cls, w_st_reduce)
-        w_cls_reduce = space.getattr(w_cls_reduce_meth, space.wrap('im_func'))
+        try:
+            w_cls_reduce = space.getattr(w_cls_reduce_meth, space.wrap('im_func'))
+        except OperationError as e:
+            # i.e. PyCFunction from cpyext
+            if not e.match(space, space.w_AttributeError):
+                raise
+            w_cls_reduce = space.w_None
         w_objtype = space.w_object
         w_obj_dict = space.getattr(w_objtype, space.wrap('__dict__'))
         w_obj_reduce = space.getitem(w_obj_dict, w_st_reduce)
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -467,7 +467,11 @@
             assert saveerrloc.is_imm()
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
         return fcond
 
     def _genop_same_as(self, op, arglocs, regalloc, fcond):
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -397,9 +397,9 @@
         else:
             self.rm.force_spill_var(var)
 
-    def before_call(self, force_store=[], save_all_regs=False):
-        self.rm.before_call(force_store, save_all_regs)
-        self.vfprm.before_call(force_store, save_all_regs)
+    def before_call(self, save_all_regs=False):
+        self.rm.before_call(save_all_regs)
+        self.vfprm.before_call(save_all_regs)
 
     def _sync_var(self, v):
         if v.type == FLOAT:
@@ -552,8 +552,7 @@
     prepare_op_call_f = _prepare_op_call
     prepare_op_call_n = _prepare_op_call
 
-    def _prepare_call(self, op, force_store=[], save_all_regs=False,
-                      first_arg_index=1):
+    def _prepare_call(self, op, save_all_regs=False, first_arg_index=1):
         args = [None] * (op.numargs() + 3)
         calldescr = op.getdescr()
         assert isinstance(calldescr, CallDescr)
@@ -571,17 +570,27 @@
         args[1] = imm(size)
         args[2] = sign_loc
 
-        args[0] = self._call(op, args, force_store, save_all_regs)
+        effectinfo = calldescr.get_extra_info()
+        if save_all_regs:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+
+        args[0] = self._call(op, args, gc_level)
         return args
 
-    def _call(self, op, arglocs, force_store=[], save_all_regs=False):
-        # spill variables that need to be saved around calls
-        self.vfprm.before_call(force_store, save_all_regs=save_all_regs)
-        if not save_all_regs:
-            gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-            if gcrootmap and gcrootmap.is_shadow_stack:
-                save_all_regs = 2
-        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+    def _call(self, op, arglocs, gc_level):
+        # spill variables that need to be saved around calls:
+        # gc_level == 0: callee cannot invoke the GC
+        # gc_level == 1: can invoke GC, save all regs that contain pointers
+        # gc_level == 2: can force, save all regs
+        save_all_regs = gc_level == 2
+        self.vfprm.before_call(save_all_regs=save_all_regs)
+        if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+            save_all_regs = 2
+        self.rm.before_call(save_all_regs=save_all_regs)
         resloc = self.after_call(op)
         return resloc
 
@@ -1068,7 +1077,7 @@
     def _prepare_op_call_assembler(self, op, fcond):
         locs = self.locs_for_call_assembler(op)
         tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
-        resloc = self._call(op, locs + [tmploc], save_all_regs=True)
+        resloc = self._call(op, locs + [tmploc], gc_level=2)
         return locs + [resloc, tmploc]
 
     prepare_op_call_assembler_i = _prepare_op_call_assembler
diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -574,27 +574,113 @@
             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
+    def _bc_spill(self, v, new_free_regs):
+        self._sync_var(v)
+        new_free_regs.append(self.reg_bindings.pop(v))
+
     def before_call(self, force_store=[], save_all_regs=0):
-        """ Spill registers before a call, as described by
-        'self.save_around_call_regs'.  Registers are not spilled if
-        they don't survive past the current operation, unless they
-        are listed in 'force_store'.  'save_all_regs' can be 0 (default),
-        1 (save all), or 2 (save default+PTRs).
+        """Spill or move some registers before a call.  By default,
+        this means: for every register in 'self.save_around_call_regs',
+        if there is a variable there and it survives longer than
+        the current operation, then it is spilled/moved somewhere else.
+
+        'save_all_regs' can be 0 (default set of registers), 1 (do that
+        for all registers), or 2 (default + gc ptrs).
+
+        Overview of what we do (the implementation does it differently,
+        for the same result):
+
+        * we first check the set of registers that are free: call it F.
+
+        * possibly_free_vars() is implied for all variables (except
+          the ones listed in force_store): if they don't survive past
+          the current operation, they are forgotten now.  (Their
+          register remain not in F, because they are typically
+          arguments to the call, so they should not be overwritten by
+          the next step.)
+
+        * then for every variable that needs to be spilled/moved: if
+          there is an entry in F that is acceptable, pick it and emit a
+          move.  Otherwise, emit a spill.  Start doing this with the
+          variables that survive the shortest time, to give them a
+          better change to remain in a register---similar algo as
+          _pick_variable_to_spill().
+
+        Note: when a register is moved, it often (but not always) means
+        we could have been more clever and picked a better register in
+        the first place, when we did so earlier.  It is done this way
+        anyway, as a local hack in this function, because on x86 CPUs
+        such register-register moves are almost free.
         """
+        new_free_regs = []
+        move_or_spill = []
+
         for v, reg in self.reg_bindings.items():
-            if v not in force_store and self.longevity[v][1] <= self.position:
+            max_age = self.longevity[v][1]
+            if v not in force_store and max_age <= self.position:
                 # variable dies
                 del self.reg_bindings[v]
-                self.free_regs.append(reg)
+                new_free_regs.append(reg)
                 continue
-            if save_all_regs != 1 and reg not in self.save_around_call_regs:
-                if save_all_regs == 0:
-                    continue    # we don't have to
-                if v.type != REF:
-                    continue    # only save GC pointers
-            self._sync_var(v)
-            del self.reg_bindings[v]
-            self.free_regs.append(reg)
+
+            if save_all_regs == 1:
+                # we need to spill all registers in this mode
+                self._bc_spill(v, new_free_regs)
+                #
+            elif save_all_regs == 2 and v.type == REF:
+                # we need to spill all GC ptrs in this mode
+                self._bc_spill(v, new_free_regs)
+                #
+            elif reg not in self.save_around_call_regs:
+                continue  # in a register like ebx/rbx: it is fine where it is
+                #
+            else:
+                # this is a register like eax/rax, which needs either
+                # spilling or moving.
+                move_or_spill.append((v, max_age))
+
+        if len(move_or_spill) > 0:
+            while len(self.free_regs) > 0:
+                new_reg = self.free_regs.pop()
+                if new_reg in self.save_around_call_regs:
+                    new_free_regs.append(new_reg)    # not this register...
+                    continue
+                # This 'new_reg' is suitable for moving a candidate to.
+                # Pick the one with the smallest max_age.  (This
+                # is one step of a naive sorting algo, slow in theory,
+                # but the list should always be very small so it
+                # doesn't matter.)
+                best_i = 0
+                smallest_max_age = move_or_spill[0][1]
+                for i in range(1, len(move_or_spill)):
+                    max_age = move_or_spill[i][1]
+                    if max_age < smallest_max_age:
+                        best_i = i
+                        smallest_max_age = max_age
+                v, max_age = move_or_spill.pop(best_i)
+                # move from 'reg' to 'new_reg'
+                reg = self.reg_bindings[v]
+                if not we_are_translated():
+                    if move_or_spill:
+                        assert max_age <= min([_a for _, _a in move_or_spill])
+                    assert reg in self.save_around_call_regs
+                    assert new_reg not in self.save_around_call_regs
+                self.assembler.regalloc_mov(reg, new_reg)
+                self.reg_bindings[v] = new_reg    # change the binding
+                new_free_regs.append(reg)
+                #
+                if len(move_or_spill) == 0:
+                    break
+            else:
+                # no more free registers to move to, spill the rest
+                for v, max_age in move_or_spill:
+                    self._bc_spill(v, new_free_regs)
+
+        # re-add registers in 'new_free_regs', but in reverse order,
+        # so that the last ones (added just above, from
+        # save_around_call_regs) are picked last by future '.pop()'
+        while len(new_free_regs) > 0:
+            self.free_regs.append(new_free_regs.pop())
 
     def after_call(self, v):
         """ Adjust registers according to the result of the call,
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -603,7 +603,11 @@
             assert saveerrloc.is_imm()
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
 
     def _genop_call(self, op, arglocs, regalloc):
         oopspecindex = regalloc.get_oopspecindex(op)
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -1,6 +1,7 @@
 from rpython.jit.backend.llsupport.regalloc import (RegisterManager, FrameManager,
                                                     TempVar, compute_vars_longevity,
                                                     BaseRegalloc)
+from rpython.jit.backend.llsupport.descr import CallDescr
 from rpython.jit.backend.ppc.arch import (WORD, MY_COPY_OF_REGS, IS_PPC_32)
 from rpython.jit.codewriter import longlong
 from rpython.jit.backend.ppc.jump import (remap_frame_layout,
@@ -369,9 +370,9 @@
         # This operation is used only for testing
         self.force_spill_var(op.getarg(0))
 
-    def before_call(self, force_store=[], save_all_regs=False):
-        self.rm.before_call(force_store, save_all_regs)
-        self.fprm.before_call(force_store, save_all_regs)
+    def before_call(self, save_all_regs=False):
+        self.rm.before_call(save_all_regs)
+        self.fprm.before_call(save_all_regs)
 
     def after_call(self, v):
         if v.type == FLOAT:
@@ -756,7 +757,7 @@
         src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
         dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
         length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        self._spill_before_call(save_all_regs=False)
+        self._spill_before_call(gc_level=0)
         return [src_ptr_loc, dst_ptr_loc,
                 src_ofs_loc, dst_ofs_loc, length_loc]
 
@@ -789,13 +790,15 @@
     prepare_call_f = _prepare_call
     prepare_call_n = _prepare_call
 
-    def _spill_before_call(self, save_all_regs=False):
-        # spill variables that need to be saved around calls
+    def _spill_before_call(self, gc_level):
+        # spill variables that need to be saved around calls:
+        # gc_level == 0: callee cannot invoke the GC
+        # gc_level == 1: can invoke GC, save all regs that contain pointers
+        # gc_level == 2: can force, save all regs
+        save_all_regs = gc_level == 2
         self.fprm.before_call(save_all_regs=save_all_regs)
-        if not save_all_regs:
-            gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
-            if gcrootmap and gcrootmap.is_shadow_stack:
-                save_all_regs = 2
+        if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+            save_all_regs = 2
         self.rm.before_call(save_all_regs=save_all_regs)
 
     def _prepare_call(self, op, save_all_regs=False):
@@ -803,7 +806,18 @@
         args.append(None)
         for i in range(op.numargs()):
             args.append(self.loc(op.getarg(i)))
-        self._spill_before_call(save_all_regs)
+
+        calldescr = op.getdescr()
+        assert isinstance(calldescr, CallDescr)
+        effectinfo = calldescr.get_extra_info()
+        if save_all_regs:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+        self._spill_before_call(gc_level=gc_level)
+
         if op.type != VOID:
             resloc = self.after_call(op)
             args[0] = resloc
@@ -932,7 +946,7 @@
 
     def _prepare_call_assembler(self, op):
         locs = self.locs_for_call_assembler(op)
-        self._spill_before_call(save_all_regs=True)
+        self._spill_before_call(gc_level=2)
         if op.type != VOID:
             resloc = self.after_call(op)
         else:
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -2112,7 +2112,11 @@
             assert isinstance(saveerrloc, ImmedLoc)
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
 
     def _store_force_index(self, guard_op):
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -795,22 +795,22 @@
         else:
             self._consider_call(op)
 
-    def _call(self, op, arglocs, force_store=[], guard_not_forced=False):
+    def _call(self, op, arglocs, gc_level):
         # we need to save registers on the stack:
         #
         #  - at least the non-callee-saved registers
         #
-        #  - we assume that any call can collect, and we
-        #    save also the callee-saved registers that contain GC pointers
+        #  - if gc_level > 0, we save also the callee-saved registers that
+        #    contain GC pointers
         #
-        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
-        #    anyway, in case we need to do cpu.force().  The issue is that
-        #    grab_frame_values() would not be able to locate values in
-        #    callee-saved registers.
+        #  - gc_level == 2 for CALL_MAY_FORCE or CALL_ASSEMBLER.  We
+        #    have to save all regs anyway, in case we need to do
+        #    cpu.force().  The issue is that grab_frame_values() would
+        #    not be able to locate values in callee-saved registers.
         #
-        save_all_regs = guard_not_forced
-        self.xrm.before_call(force_store, save_all_regs=save_all_regs)
-        if not save_all_regs:
+        save_all_regs = gc_level == 2
+        self.xrm.before_call(save_all_regs=save_all_regs)
+        if gc_level == 1:
             gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
             # we save all the registers for shadowstack and asmgcc for now
             # --- for asmgcc too: we can't say "register x is a gc ref"
@@ -818,7 +818,7 @@
             # more for now.
             if gcrootmap: # and gcrootmap.is_shadow_stack:
                 save_all_regs = 2
-        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+        self.rm.before_call(save_all_regs=save_all_regs)
         if op.type != 'v':
             if op.type == FLOAT:
                 resloc = self.xrm.after_call(op)
@@ -838,9 +838,18 @@
             sign_loc = imm1
         else:
             sign_loc = imm0
+        #
+        effectinfo = calldescr.get_extra_info()
+        if guard_not_forced:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+        #
         self._call(op, [imm(size), sign_loc] +
                        [self.loc(op.getarg(i)) for i in range(op.numargs())],
-                   guard_not_forced=guard_not_forced)
+                   gc_level=gc_level)
 
     def _consider_real_call(self, op):
         effectinfo = op.getdescr().get_extra_info()
@@ -899,7 +908,7 @@
 
     def _consider_call_assembler(self, op):
         locs = self.locs_for_call_assembler(op)
-        self._call(op, locs, guard_not_forced=True)
+        self._call(op, locs, gc_level=2)
     consider_call_assembler_i = _consider_call_assembler
     consider_call_assembler_r = _consider_call_assembler
     consider_call_assembler_f = _consider_call_assembler
diff --git a/rpython/jit/codewriter/call.py b/rpython/jit/codewriter/call.py
--- a/rpython/jit/codewriter/call.py
+++ b/rpython/jit/codewriter/call.py
@@ -14,6 +14,7 @@
 from rpython.translator.backendopt.canraise import RaiseAnalyzer
 from rpython.translator.backendopt.writeanalyze import ReadWriteAnalyzer
 from rpython.translator.backendopt.graphanalyze import DependencyTracker
+from rpython.translator.backendopt.collectanalyze import CollectAnalyzer
 
 
 class CallControl(object):
@@ -37,9 +38,9 @@
             self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
             self.quasiimmut_analyzer = QuasiImmutAnalyzer(translator)
             self.randomeffects_analyzer = RandomEffectsAnalyzer(translator)
-            self.seen = DependencyTracker(self.readwrite_analyzer)
-        else:
-            self.seen = None
+            self.collect_analyzer = CollectAnalyzer(translator)
+            self.seen_rw = DependencyTracker(self.readwrite_analyzer)
+            self.seen_gc = DependencyTracker(self.collect_analyzer)
         #
         for index, jd in enumerate(jitdrivers_sd):
             jd.index = index
@@ -294,9 +295,9 @@
                     "but the function has no result" % (op, ))
         #
         effectinfo = effectinfo_from_writeanalyze(
-            self.readwrite_analyzer.analyze(op, self.seen), self.cpu,
+            self.readwrite_analyzer.analyze(op, self.seen_rw), self.cpu,
             extraeffect, oopspecindex, can_invalidate, call_release_gil_target,
-            extradescr,
+            extradescr, self.collect_analyzer.analyze(op, self.seen_gc),
         )
         #
         assert effectinfo is not None
diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py
--- a/rpython/jit/codewriter/effectinfo.py
+++ b/rpython/jit/codewriter/effectinfo.py
@@ -116,7 +116,8 @@
                 oopspecindex=OS_NONE,
                 can_invalidate=False,
                 call_release_gil_target=_NO_CALL_RELEASE_GIL_TARGET,
-                extradescrs=None):
+                extradescrs=None,
+                can_collect=True):
         readonly_descrs_fields = frozenset_or_none(readonly_descrs_fields)
         readonly_descrs_arrays = frozenset_or_none(readonly_descrs_arrays)
         readonly_descrs_interiorfields = frozenset_or_none(
@@ -133,7 +134,8 @@
                write_descrs_interiorfields,
                extraeffect,
                oopspecindex,
-               can_invalidate)
+               can_invalidate,
+               can_collect)
         tgt_func, tgt_saveerr = call_release_gil_target
         if tgt_func:
             key += (object(),)    # don't care about caching in this case
@@ -184,6 +186,7 @@
         #
         result.extraeffect = extraeffect
         result.can_invalidate = can_invalidate
+        result.can_collect = can_collect
         result.oopspecindex = oopspecindex
         result.extradescrs = extradescrs
         result.call_release_gil_target = call_release_gil_target
@@ -230,6 +233,9 @@
     def check_can_invalidate(self):
         return self.can_invalidate
 
+    def check_can_collect(self):
+        return self.can_collect
+
     def check_is_elidable(self):
         return (self.extraeffect == self.EF_ELIDABLE_CAN_RAISE or
                 self.extraeffect == self.EF_ELIDABLE_OR_MEMORYERROR or
@@ -268,7 +274,8 @@
                                  can_invalidate=False,
                                  call_release_gil_target=
                                      EffectInfo._NO_CALL_RELEASE_GIL_TARGET,
-                                 extradescr=None):
+                                 extradescr=None,
+                                 can_collect=True):
     from rpython.translator.backendopt.writeanalyze import top_set
     if effects is top_set or extraeffect == EffectInfo.EF_RANDOM_EFFECTS:
         readonly_descrs_fields = None
@@ -343,6 +350,9 @@
             else:
                 assert 0
     #
+    if extraeffect >= EffectInfo.EF_FORCES_VIRTUAL_OR_VIRTUALIZABLE:
+        can_collect = True
+    #
     return EffectInfo(readonly_descrs_fields,
                       readonly_descrs_arrays,
                       readonly_descrs_interiorfields,
@@ -353,7 +363,8 @@
                       oopspecindex,
                       can_invalidate,
                       call_release_gil_target,
-                      extradescr)
+                      extradescr,
+                      can_collect)
 
 def consider_struct(TYPE, fieldname):
     if fieldType(TYPE, fieldname) is lltype.Void:
diff --git a/rpython/jit/codewriter/test/test_call.py b/rpython/jit/codewriter/test/test_call.py
--- a/rpython/jit/codewriter/test/test_call.py
+++ b/rpython/jit/codewriter/test/test_call.py
@@ -334,3 +334,37 @@
     assert call_op.opname == 'direct_call'
     with py.test.raises(Exception):
         call_descr = cc.getcalldescr(call_op)
+
+def test_can_or_cannot_collect():
+    from rpython.jit.backend.llgraph.runner import LLGraphCPU
+    prebuilts = [[5], [6]]
+    l = []
+    def f1(n):
+        if n > 1:
+            raise IndexError
+        return prebuilts[n]    # cannot collect
+    f1._dont_inline_ = True
+
+    def f2(n):
+        return [n]         # can collect
+    f2._dont_inline_ = True
+
+    def f(n):
+        a = f1(n)
+        b = f2(n)
+        return len(a) + len(b)
+
+    rtyper = support.annotate(f, [1])
+    jitdriver_sd = FakeJitDriverSD(rtyper.annotator.translator.graphs[0])
+    cc = CallControl(LLGraphCPU(rtyper), jitdrivers_sd=[jitdriver_sd])
+    res = cc.find_all_graphs(FakePolicy())
+    [f_graph] = [x for x in res if x.func is f]
+    for index, expected in [
+            (0, False),    # f1()
+            (1, True),     # f2()
+            (2, False),    # len()
+            (3, False)]:   # len()
+        call_op = f_graph.startblock.operations[index]
+        assert call_op.opname == 'direct_call'
+        call_descr = cc.getcalldescr(call_op)
+        assert call_descr.extrainfo.check_can_collect() == expected
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -1358,11 +1358,14 @@
         return cls.minimal_size_in_nursery
 
     def write_barrier(self, addr_struct):
-        if self.header(addr_struct).tid & GCFLAG_TRACK_YOUNG_PTRS:
+        # see OP_GC_BIT in translator/c/gc.py
+        if llop.gc_bit(lltype.Signed, self.header(addr_struct),
+                       GCFLAG_TRACK_YOUNG_PTRS):
             self.remember_young_pointer(addr_struct)
 
     def write_barrier_from_array(self, addr_array, index):
-        if self.header(addr_array).tid & GCFLAG_TRACK_YOUNG_PTRS:
+        if llop.gc_bit(lltype.Signed, self.header(addr_array),
+                       GCFLAG_TRACK_YOUNG_PTRS):
             if self.card_page_indices > 0:
                 self.remember_young_pointer_from_array2(addr_array, index)
             else:
diff --git a/rpython/memory/gctransform/framework.py b/rpython/memory/gctransform/framework.py
--- a/rpython/memory/gctransform/framework.py
+++ b/rpython/memory/gctransform/framework.py
@@ -5,7 +5,7 @@
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rtyper import rmodel, annlowlevel
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, llgroup
-from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS, llop
+from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.memory import gctypelayout
 from rpython.memory.gctransform.log import log
 from rpython.memory.gctransform.support import get_rtti, ll_call_destructor
@@ -14,7 +14,7 @@
 from rpython.memory.gctypelayout import ll_weakref_deref, WEAKREF, WEAKREFPTR
 from rpython.memory.gctypelayout import FIN_TRIGGER_FUNC, FIN_HANDLER_ARRAY
 from rpython.tool.sourcetools import func_with_new_name
-from rpython.translator.backendopt import graphanalyze
+from rpython.translator.backendopt.collectanalyze import CollectAnalyzer
 from rpython.translator.backendopt.finalizer import FinalizerAnalyzer
 from rpython.translator.backendopt.support import var_needsgc
 import types
@@ -23,33 +23,6 @@
 TYPE_ID = llgroup.HALFWORD
 
 
-class CollectAnalyzer(graphanalyze.BoolGraphAnalyzer):
-
-    def analyze_direct_call(self, graph, seen=None):
-        try:
-            func = graph.func
-        except AttributeError:
-            pass
-        else:
-            if getattr(func, '_gctransformer_hint_cannot_collect_', False):
-                return False
-            if getattr(func, '_gctransformer_hint_close_stack_', False):
-                return True
-        return graphanalyze.BoolGraphAnalyzer.analyze_direct_call(self, graph,
-                                                                  seen)
-    def analyze_external_call(self, funcobj, seen=None):
-        if funcobj.random_effects_on_gcobjs:
-            return True
-        return graphanalyze.BoolGraphAnalyzer.analyze_external_call(
-            self, funcobj, seen)
-    def analyze_simple_operation(self, op, graphinfo):
-        if op.opname in ('malloc', 'malloc_varsize'):
-            flags = op.args[1].value
-            return flags['flavor'] == 'gc'
-        else:
-            return (op.opname in LL_OPERATIONS and
-                    LL_OPERATIONS[op.opname].canmallocgc)
-
 def propagate_no_write_barrier_needed(result, block, mallocvars,
                                       collect_analyzer, entrymap,
                                       startindex=0):
diff --git a/rpython/rlib/rarithmetic.py b/rpython/rlib/rarithmetic.py
--- a/rpython/rlib/rarithmetic.py
+++ b/rpython/rlib/rarithmetic.py
@@ -213,6 +213,8 @@
         return self_type
     if self_type in (bool, int, long):
         return other_type
+    if self_type is float or other_type is float:
+        return float
     if self_type.SIGNED == other_type.SIGNED:
         return build_int(None, self_type.SIGNED, max(self_type.BITS, other_type.BITS))
     raise AssertionError("Merging these types (%s, %s) is not supported" % (self_type, other_type))
@@ -297,6 +299,7 @@
     def _widen(self, other, value):
         """
         if one argument is int or long, the other type wins.
+        if one argument is float, the result is float.
         otherwise, produce the largest class to hold the result.
         """
         self_type = type(self)
diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -18,11 +18,11 @@
 
 class Test_r_int:
     def test__add__(self):
-        self.binary_test(lambda x, y: x + y)
+        self.binary_test(lambda x, y: x + y, includes_floats=True)
     def test__sub__(self):
-        self.binary_test(lambda x, y: x - y)
+        self.binary_test(lambda x, y: x - y, includes_floats=True)
     def test__mul__(self):
-        self.binary_test(lambda x, y: x * y)
+        self.binary_test(lambda x, y: x * y, includes_floats=True)
         x = 3; y = [2]
         assert x*y == r_int(x)*y
         assert y*x == y*r_int(x)
@@ -58,12 +58,15 @@
             cmp = f(r_int(arg))
             assert res == cmp
 
-    def binary_test(self, f, rargs = None):
+    def binary_test(self, f, rargs=None, includes_floats=False):
         if not rargs:
             rargs = (-10, -1, 3, 55)
+        types_list = [(int, r_int), (r_int, int), (r_int, r_int)]
+        if includes_floats:
+            types_list += [(float, r_int), (r_int, float)]
         for larg in (-10, -1, 0, 3, 1234):
             for rarg in rargs:
-                for types in ((int, r_int), (r_int, int), (r_int, r_int)):
+                for types in types_list:
                     res = f(larg, rarg)
                     left, right = types
                     cmp = f(left(larg), right(rarg))
diff --git a/rpython/rtyper/lltypesystem/lloperation.py b/rpython/rtyper/lltypesystem/lloperation.py
--- a/rpython/rtyper/lltypesystem/lloperation.py
+++ b/rpython/rtyper/lltypesystem/lloperation.py
@@ -470,6 +470,7 @@
     'gc_pin'              : LLOp(canrun=True),
     'gc_unpin'            : LLOp(canrun=True),
     'gc__is_pinned'        : LLOp(canrun=True),
+    'gc_bit'              : LLOp(sideeffects=False, canrun=True),
 
     'gc_get_rpy_roots'    : LLOp(),
     'gc_get_rpy_referents': LLOp(),
diff --git a/rpython/rtyper/lltypesystem/opimpl.py b/rpython/rtyper/lltypesystem/opimpl.py
--- a/rpython/rtyper/lltypesystem/opimpl.py
+++ b/rpython/rtyper/lltypesystem/opimpl.py
@@ -1,3 +1,4 @@
+import random, sys
 from rpython.flowspace.operation import op
 from rpython.rlib import debug
 from rpython.rlib.rarithmetic import is_valid_int
@@ -680,6 +681,11 @@
 def op_gc_writebarrier(addr):
     pass
 
+def op_gc_bit(hdr, bitmask):
+    if hdr.tid & bitmask:
+        return random.randrange(1, sys.maxint)
+    return 0
+
 def op_shrink_array(array, smallersize):
     return False
 
diff --git a/rpython/translator/backendopt/collectanalyze.py b/rpython/translator/backendopt/collectanalyze.py
new file mode 100644
--- /dev/null
+++ b/rpython/translator/backendopt/collectanalyze.py
@@ -0,0 +1,33 @@
+from rpython.translator.backendopt import graphanalyze
+from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+
+# NB. tests are in rpython/memory/gctransform/test/test_framework.py
+
+
+class CollectAnalyzer(graphanalyze.BoolGraphAnalyzer):
+
+    def analyze_direct_call(self, graph, seen=None):
+        try:
+            func = graph.func
+        except AttributeError:
+            pass
+        else:
+            if getattr(func, '_gctransformer_hint_cannot_collect_', False):
+                return False
+            if getattr(func, '_gctransformer_hint_close_stack_', False):
+                return True
+        return graphanalyze.BoolGraphAnalyzer.analyze_direct_call(self, graph,
+                                                                  seen)
+    def analyze_external_call(self, funcobj, seen=None):
+        if funcobj.random_effects_on_gcobjs:
+            return True
+        return graphanalyze.BoolGraphAnalyzer.analyze_external_call(
+            self, funcobj, seen)
+
+    def analyze_simple_operation(self, op, graphinfo):
+        if op.opname in ('malloc', 'malloc_varsize'):
+            flags = op.args[1].value
+            return flags['flavor'] == 'gc'
+        else:
+            return (op.opname in LL_OPERATIONS and
+                    LL_OPERATIONS[op.opname].canmallocgc)
diff --git a/rpython/translator/c/gc.py b/rpython/translator/c/gc.py
--- a/rpython/translator/c/gc.py
+++ b/rpython/translator/c/gc.py
@@ -391,6 +391,34 @@
             raise AssertionError(subopnum)
         return ' '.join(parts)
 
+    def OP_GC_BIT(self, funcgen, op):
+        # This is a two-arguments operation (x, y) where x is a
+        # pointer and y is a constant power of two.  It returns 0 if
+        # "(*(Signed*)x) & y == 0", and non-zero if it is "== y".
+        #
+        # On x86-64, emitting this is better than emitting a load
+        # followed by an INT_AND for the case where y doesn't fit in
+        # 32 bits.  I've seen situations where a register was wasted
+        # to contain the constant 2**32 throughout a complete messy
+        # function; the goal of this GC_BIT is to avoid that.
+        #
+        # Don't abuse, though.  If you need to check several bits in
+        # sequence, then it's likely better to load the whole Signed
+        # first; using GC_BIT would result in multiple accesses to
+        # memory.
+        #
+        bitmask = op.args[1].value
+        assert bitmask > 0 and (bitmask & (bitmask - 1)) == 0
+        offset = 0
+        while bitmask >= 0x100:
+            offset += 1
+            bitmask >>= 8
+        if sys.byteorder == 'big':
+            offset = 'sizeof(Signed)-%s' % (offset+1)
+        return '%s = ((char *)%s)[%s] & %d;' % (funcgen.expr(op.result),
+                                                funcgen.expr(op.args[0]),
+                                                offset, bitmask)
+
 class ShadowStackFrameworkGcPolicy(BasicFrameworkGcPolicy):
 
     def gettransformer(self, translator):