[pypy-commit] pypy py3k: hg merge default

Thu Jun 2 12:26:02 EDT 2016

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3k
Changeset: r84885:1de2e9ff0c99
Date: 2016-06-02 17:04 +0100
http://bitbucket.org/pypy/pypy/changeset/1de2e9ff0c99/

Log:	hg merge default

diff too long, truncating to 2000 out of 5903 lines

diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -23,3 +23,5 @@
 3260adbeba4a8b6659d1cc0d0b41f266769b74da release-5.1
 b0a649e90b6642251fb4a765fe5b27a97b1319a9 release-5.1.1
 80ef432a32d9baa4b3c5a54c215e8ebe499f6374 release-5.1.2
+40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
+40497617ae91caa1a394d8be6f9cd2de31cb0628 release-pypy3.3-v5.2
diff --git a/lib-python/2.7/subprocess.py b/lib-python/2.7/subprocess.py
--- a/lib-python/2.7/subprocess.py
+++ b/lib-python/2.7/subprocess.py
@@ -834,54 +834,63 @@
             c2pread, c2pwrite = None, None
             errread, errwrite = None, None
 
+            ispread = False
             if stdin is None:
                 p2cread = _subprocess.GetStdHandle(_subprocess.STD_INPUT_HANDLE)
                 if p2cread is None:
                     p2cread, _ = _subprocess.CreatePipe(None, 0)
+                    ispread = True
             elif stdin == PIPE:
                 p2cread, p2cwrite = _subprocess.CreatePipe(None, 0)
+                ispread = True
             elif isinstance(stdin, int):
                 p2cread = msvcrt.get_osfhandle(stdin)
             else:
                 # Assuming file-like object
                 p2cread = msvcrt.get_osfhandle(stdin.fileno())
-            p2cread = self._make_inheritable(p2cread)
+            p2cread = self._make_inheritable(p2cread, ispread)
             # We just duplicated the handle, it has to be closed at the end
             to_close.add(p2cread)
             if stdin == PIPE:
                 to_close.add(p2cwrite)
 
+            ispwrite = False
             if stdout is None:
                 c2pwrite = _subprocess.GetStdHandle(_subprocess.STD_OUTPUT_HANDLE)
                 if c2pwrite is None:
                     _, c2pwrite = _subprocess.CreatePipe(None, 0)
+                    ispwrite = True
             elif stdout == PIPE:
                 c2pread, c2pwrite = _subprocess.CreatePipe(None, 0)
+                ispwrite = True
             elif isinstance(stdout, int):
                 c2pwrite = msvcrt.get_osfhandle(stdout)
             else:
                 # Assuming file-like object
                 c2pwrite = msvcrt.get_osfhandle(stdout.fileno())
-            c2pwrite = self._make_inheritable(c2pwrite)
+            c2pwrite = self._make_inheritable(c2pwrite, ispwrite)
             # We just duplicated the handle, it has to be closed at the end
             to_close.add(c2pwrite)
             if stdout == PIPE:
                 to_close.add(c2pread)
 
+            ispwrite = False
             if stderr is None:
                 errwrite = _subprocess.GetStdHandle(_subprocess.STD_ERROR_HANDLE)
                 if errwrite is None:
                     _, errwrite = _subprocess.CreatePipe(None, 0)
+                    ispwrite = True
             elif stderr == PIPE:
                 errread, errwrite = _subprocess.CreatePipe(None, 0)
+                ispwrite = True
             elif stderr == STDOUT:
-                errwrite = c2pwrite.handle # pass id to not close it
+                errwrite = c2pwrite
             elif isinstance(stderr, int):
                 errwrite = msvcrt.get_osfhandle(stderr)
             else:
                 # Assuming file-like object
                 errwrite = msvcrt.get_osfhandle(stderr.fileno())
-            errwrite = self._make_inheritable(errwrite)
+            errwrite = self._make_inheritable(errwrite, ispwrite)
             # We just duplicated the handle, it has to be closed at the end
             to_close.add(errwrite)
             if stderr == PIPE:
@@ -892,13 +901,14 @@
                     errread, errwrite), to_close
 
 
-        def _make_inheritable(self, handle):
+        def _make_inheritable(self, handle, close=False):
             """Return a duplicate of handle, which is inheritable"""
             dupl = _subprocess.DuplicateHandle(_subprocess.GetCurrentProcess(),
                                 handle, _subprocess.GetCurrentProcess(), 0, 1,
                                 _subprocess.DUPLICATE_SAME_ACCESS)
-            # If the initial handle was obtained with CreatePipe, close it.
-            if not isinstance(handle, int):
+            # PyPy: If the initial handle was obtained with CreatePipe,
+            # close it.
+            if close:
                 handle.Close()
             return dupl
 
diff --git a/lib_pypy/_pypy_irc_topic.py b/lib_pypy/_pypy_irc_topic.py
--- a/lib_pypy/_pypy_irc_topic.py
+++ b/lib_pypy/_pypy_irc_topic.py
@@ -224,23 +224,9 @@
 va ClCl orvat bayl zbqrengryl zntvp vf n tbbq guvat <psobym>
 """
 
-from string import ascii_uppercase, ascii_lowercase
-
 def rot13(data):
-    """ A simple rot-13 encoder since `str.encode('rot13')` was removed from
-        Python as of version 3.0.  It rotates both uppercase and lowercase letters individually.
-    """
-    total = []
-    for char in data:
-        if char in ascii_uppercase:
-            index = (ascii_uppercase.find(char) + 13) % 26
-            total.append(ascii_uppercase[index])
-        elif char in ascii_lowercase:
-            index = (ascii_lowercase.find(char) + 13) % 26
-            total.append(ascii_lowercase[index])
-        else:
-            total.append(char)
-    return "".join(total)
+    return ''.join(chr(ord(c)+(13 if 'A'<=c.upper()<='M' else
+                              -13 if 'N'<=c.upper()<='Z' else 0)) for c in data)
 
 def some_topic():
     import time
diff --git a/lib_pypy/pyrepl/simple_interact.py b/lib_pypy/pyrepl/simple_interact.py
--- a/lib_pypy/pyrepl/simple_interact.py
+++ b/lib_pypy/pyrepl/simple_interact.py
@@ -43,11 +43,13 @@
         return short
     return text
 
-def run_multiline_interactive_console(mainmodule=None):
+def run_multiline_interactive_console(mainmodule=None, future_flags=0):
     import code
     import __main__
     mainmodule = mainmodule or __main__
     console = code.InteractiveConsole(mainmodule.__dict__, filename='<stdin>')
+    if future_flags:
+        console.compile.compiler.flags |= future_flags
 
     def more_lines(unicodetext):
         # ooh, look at the hack:
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -70,9 +70,6 @@
 bz2
     libbz2
 
-lzma (PyPy3 only)
-    liblzma
-
 pyexpat
     libexpat1
 
@@ -98,11 +95,16 @@
 tk
     tk-dev
 
+lzma (PyPy3 only)
+    liblzma
+
+To run untranslated tests, you need the Boehm garbage collector libgc.
+
 On Debian, this is the command to install all build-time dependencies::
 
     apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \
     libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \
-    tk-dev libgc-dev
+    tk-dev libgc-dev liblzma-dev
 
 For the optional lzma module on PyPy3 you will also need ``liblzma-dev``.
 
diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -49,6 +49,13 @@
    release-0.6
 
 
+CPython 3.3 compatible versions
+-------------------------------
+
+.. toctree::
+
+   release-pypy3.3-v5.2-alpha1.rst
+
 CPython 3.2 compatible versions
 -------------------------------
 
diff --git a/pypy/doc/release-pypy3.3-v5.2-alpha1.rst b/pypy/doc/release-pypy3.3-v5.2-alpha1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-pypy3.3-v5.2-alpha1.rst
@@ -0,0 +1,69 @@
+===================
+PyPy3 v5.2 alpha 1
+===================
+
+We're pleased to announce the first alpha release of PyPy3.3 v5.2. This is the
+first release of PyPy which targets Python 3.3 (3.3.5) compatibility.
+
+We would like to thank all of the people who donated_ to the `py3k proposal`_
+for supporting the work that went into this and future releases.
+
+You can download the PyPy3.3 v5.2 alpha 1 release here:
+
+    http://pypy.org/download.html#python-3-3-5-compatible-pypy3-3-v5-2
+
+Highlights
+==========
+
+* Python 3.3.5 support!
+
+  - Being an early alpha release, there are some `missing features`_ such as a
+    `PEP 393-like space efficient string representation`_ and `known issues`_
+    including performance regressions (e.g. issue `#2305`_). The focus for this
+    release has been updating to 3.3 compatibility. Windows is also not yet
+    supported.
+
+* `ensurepip`_ is also included (it's only included in CPython 3 >= 3.4).
+
+What is PyPy?
+==============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7.10 and one day 3.3.5. It's fast due to its integrated tracing JIT
+compiler.
+
+We also welcome developers of other `dynamic languages`_ to see what RPython
+can do for them.
+
+This release supports:
+
+  * **x86** machines on most common operating systems except Windows
+    (Linux 32/64, Mac OS X 64, OpenBSD, FreeBSD),
+
+  * newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux,
+
+  * big- and little-endian variants of **PPC64** running Linux,
+
+  * **s390x** running Linux
+
+Please try it out and let us know what you think. We welcome feedback, we know
+you are using PyPy, please tell us about it!
+
+We'd especially like to thank these people for their contributions to this
+release:
+
+Manuel Jacob, Ronan Lamy, Mark Young, Amaury Forgeot d'Arc, Philip Jenvey,
+Martin Matusiak, Vasily Kuznetsov, Matti Picus, Armin Rigo and many others.
+
+Cheers
+
+The PyPy Team
+
+.. _donated: http://morepypy.blogspot.com/2012/01/py3k-and-numpy-first-stage-thanks-to.html
+.. _`py3k proposal`: http://pypy.org/py3donate.html
+.. _`PEP 393-like space efficient string representation`: https://bitbucket.org/pypy/pypy/issues/2309/optimized-unicode-representation
+.. _`missing features`: https://bitbucket.org/pypy/pypy/issues?status=new&status=open&component=PyPy3+%28running+Python+3.x%29&kind=enhancement
+.. _`known issues`: https://bitbucket.org/pypy/pypy/issues?status=new&status=open&component=PyPy3%20%28running%20Python%203.x%29
+.. _`#2305`: https://bitbucket.org/pypy/pypy/issues/2305
+.. _`ensurepip`: https://docs.python.org/3/library/ensurepip.html#module-ensurepip
+.. _`dynamic languages`: http://pypyjs.org
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -105,3 +105,29 @@
 Fix some warnings when compiling CPython C extension modules
 
 .. branch: syntax_fix
+
+.. branch: remove-raisingops
+
+Remove most of the _ovf, _zer and _val operations from RPython.  Kills
+quite some code internally, and allows the JIT to do better
+optimizations: for example, app-level code like ``x / 2`` or ``x % 2``
+can now be turned into ``x >> 1`` or ``x & 1``, even if x is possibly
+negative.
+
+.. branch: cpyext-old-buffers
+
+Generalize cpyext old-style buffers to more than just str/buffer, add support for mmap
+
+.. branch: numpy-includes
+
+Move _numpypy headers into a directory so they are not picked up by upstream numpy, scipy
+This allows building upstream numpy and scipy in pypy via cpyext
+
+.. branch: traceviewer-common-merge-point-formats
+
+Teach RPython JIT's off-line traceviewer the most common ``debug_merge_point`` formats.
+
+.. branch: cpyext-pickle
+
+Enable pickling of W_PyCFunctionObject by monkeypatching pickle.Pickler.dispatch
+at cpyext import time
diff --git a/pypy/doc/whatsnew-pypy3-5.1.1-alpha1.rst b/pypy/doc/whatsnew-pypy3-5.1.1-alpha1.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/whatsnew-pypy3-5.1.1-alpha1.rst
@@ -0,0 +1,10 @@
+=================================
+What's new in PyPy3 5.1.1 alpha 1
+=================================
+
+.. A recent revision, ignoring all other branches for this release
+.. startrev: 29d14733e007
+
+.. branch: py3.3
+
+Python 3.3 compatibility
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -238,6 +238,15 @@
 for use. The release packaging script will pick up the tcltk runtime in the lib
 directory and put it in the archive.
 
+The lzma compression library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Python 3.3 ship with CFFI wrappers for the lzma library, which can be
+downloaded from this site http://tukaani.org/xz. Python 3.3-3.5 use version
+5.0.5, a prebuilt version can be downloaded from
+http://tukaani.org/xz/xz-5.0.5-windows.zip, check the signature
+http://tukaani.org/xz/xz-5.0.5-windows.zip.sig
+
 
 Using the mingw compiler
 ------------------------
diff --git a/pypy/interpreter/test/test_app_main.py b/pypy/interpreter/test/test_app_main.py
--- a/pypy/interpreter/test/test_app_main.py
+++ b/pypy/interpreter/test/test_app_main.py
@@ -72,6 +72,11 @@
         print('Goodbye2')  # should not be reached
     """)
 
+script_with_future = getscript("""
+    from __future__ import division
+    from __future__ import print_function
+    """)
+
 
 @contextmanager
 def setpythonpath():
@@ -441,6 +446,31 @@
         finally:
             os.environ['PYTHONSTARTUP'] = old
 
+    def test_future_in_executed_script(self):
+        child = self.spawn(['-i', script_with_future])
+        child.expect('>>> ')
+        child.sendline('x=1; print(x/2, 3/4)')
+        child.expect('0.5 0.75')
+
+    def test_future_in_python_startup(self, monkeypatch):
+        monkeypatch.setenv('PYTHONSTARTUP', script_with_future)
+        child = self.spawn([])
+        child.expect('>>> ')
+        child.sendline('x=1; print(x/2, 3/4)')
+        child.expect('0.5 0.75')
+
+    def test_future_in_cmd(self):
+        child = self.spawn(['-i', '-c', 'from __future__ import division'])
+        child.expect('>>> ')
+        child.sendline('x=1; x/2; 3/4')
+        child.expect('0.5')
+        child.expect('0.75')
+
+    def test_cmd_co_name(self):
+        child = self.spawn(['-c',
+                    'import sys; print sys._getframe(0).f_code.co_name'])
+        child.expect('<module>')
+
     def test_ignore_python_inspect(self):
         os.environ['PYTHONINSPECT_'] = '1'
         try:
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -12,7 +12,8 @@
 
 
 class TypeDef(object):
-    def __init__(self, __name, __base=None, __total_ordering__=None, **rawdict):
+    def __init__(self, __name, __base=None, __total_ordering__=None,
+                 __buffer=None, **rawdict):
         "NOT_RPYTHON: initialization-time only"
         self.name = __name
         if __base is None:
@@ -22,6 +23,8 @@
         else:
             bases = [__base]
         self.bases = bases
+        assert __buffer in {None, 'read-write', 'read'}, "Unknown value for __buffer"
+        self.buffer = __buffer
         self.heaptype = False
         self.hasdict = '__dict__' in rawdict
         # no __del__: use an RPython _finalize_() method and register_finalizer
diff --git a/pypy/module/__pypy__/interp_intop.py b/pypy/module/__pypy__/interp_intop.py
--- a/pypy/module/__pypy__/interp_intop.py
+++ b/pypy/module/__pypy__/interp_intop.py
@@ -2,6 +2,19 @@
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib import jit
+
+
+# XXX maybe temporary: hide llop.int_{floordiv,mod} from the JIT,
+#     because now it expects only Python-style divisions, not the
+#     C-style divisions of these two ll operations
+ at jit.dont_look_inside
+def _int_floordiv(n, m):
+    return llop.int_floordiv(lltype.Signed, n, m)
+
+ at jit.dont_look_inside
+def _int_mod(n, m):
+    return llop.int_mod(lltype.Signed, n, m)
 
 
 @unwrap_spec(n=int, m=int)
@@ -18,11 +31,11 @@
 
 @unwrap_spec(n=int, m=int)
 def int_floordiv(space, n, m):
-    return space.wrap(llop.int_floordiv(lltype.Signed, n, m))
+    return space.wrap(_int_floordiv(n, m))
 
 @unwrap_spec(n=int, m=int)
 def int_mod(space, n, m):
-    return space.wrap(llop.int_mod(lltype.Signed, n, m))
+    return space.wrap(_int_mod(n, m))
 
 @unwrap_spec(n=int, m=int)
 def int_lshift(space, n, m):
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -1784,3 +1784,9 @@
         assert ffi.list_types() == (['CFFIb', 'CFFIbb', 'CFFIbbb'],
                                     ['CFFIa', 'CFFIcc', 'CFFIccc'],
                                     ['CFFIaa', 'CFFIaaa', 'CFFIg'])
+
+    def test_FFIFunctionWrapper(self):
+        ffi, lib = self.prepare("void f(void);", "test_FFIFunctionWrapper",
+                                "void f(void) { }")
+        assert lib.f.__get__(42) is lib.f
+        assert lib.f.__get__(42, int) is lib.f
diff --git a/pypy/module/_cffi_backend/wrapper.py b/pypy/module/_cffi_backend/wrapper.py
--- a/pypy/module/_cffi_backend/wrapper.py
+++ b/pypy/module/_cffi_backend/wrapper.py
@@ -100,6 +100,11 @@
         doc = '%s;\n\nCFFI C function from %s.lib' % (doc, self.modulename)
         return space.wrap(doc)
 
+    def descr_get(self, space, w_obj, w_type=None):
+        # never bind anything, but a __get__ is still present so that
+        # pydoc displays useful information (namely, the __repr__)
+        return self
+
 
 @jit.unroll_safe
 def prepare_args(space, rawfunctype, args_w, start_index):
@@ -136,5 +141,6 @@
         __name__ = interp_attrproperty('fnname', cls=W_FunctionWrapper),
         __module__ = interp_attrproperty('modulename', cls=W_FunctionWrapper),
         __doc__ = GetSetProperty(W_FunctionWrapper.descr_get_doc),
+        __get__ = interp2app(W_FunctionWrapper.descr_get),
         )
 W_FunctionWrapper.typedef.acceptable_as_base_class = False
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -1,4 +1,5 @@
 from pypy.interpreter.mixedmodule import MixedModule
+from pypy.interpreter import gateway
 from pypy.module.cpyext.state import State
 from pypy.module.cpyext import api
 
@@ -14,6 +15,12 @@
 
     def startup(self, space):
         space.fromcache(State).startup(space)
+        method = pypy.module.cpyext.typeobject.get_new_method_def(space)
+        w_obj = pypy.module.cpyext.methodobject.W_PyCFunctionObject(space, method, space.wrap(''))
+        space.appexec([space.type(w_obj)], """(methodtype):
+            from pickle import Pickler 
+            Pickler.dispatch[methodtype] = Pickler.save_global
+        """)
 
     def register_atexit(self, function):
         if len(self.atexit_funcs) >= 32:
@@ -64,6 +71,7 @@
 import pypy.module.cpyext.pyfile
 import pypy.module.cpyext.pystrtod
 import pypy.module.cpyext.pytraceback
+import pypy.module.cpyext.methodobject
 
 # now that all rffi_platform.Struct types are registered, configure them
 api.configure_types()
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -161,12 +161,13 @@
 
     if copy_numpy_headers:
         try:
-            dstdir.mkdir('numpy')
+            dstdir.mkdir('_numpypy')
+            dstdir.mkdir('_numpypy/numpy')
         except py.error.EEXIST:
             pass
-        numpy_dstdir = dstdir / 'numpy'
+        numpy_dstdir = dstdir / '_numpypy' / 'numpy'
 
-        numpy_include_dir = include_dir / 'numpy'
+        numpy_include_dir = include_dir / '_numpypy' / 'numpy'
         numpy_headers = numpy_include_dir.listdir('*.h') + numpy_include_dir.listdir('*.inl')
         _copy_header_files(numpy_headers, numpy_dstdir)
 
diff --git a/pypy/module/cpyext/include/numpy/README b/pypy/module/cpyext/include/_numpypy/numpy/README
rename from pypy/module/cpyext/include/numpy/README
rename to pypy/module/cpyext/include/_numpypy/numpy/README
diff --git a/pypy/module/cpyext/include/numpy/__multiarray_api.h b/pypy/module/cpyext/include/_numpypy/numpy/__multiarray_api.h
rename from pypy/module/cpyext/include/numpy/__multiarray_api.h
rename to pypy/module/cpyext/include/_numpypy/numpy/__multiarray_api.h
diff --git a/pypy/module/cpyext/include/numpy/arrayobject.h b/pypy/module/cpyext/include/_numpypy/numpy/arrayobject.h
rename from pypy/module/cpyext/include/numpy/arrayobject.h
rename to pypy/module/cpyext/include/_numpypy/numpy/arrayobject.h
diff --git a/pypy/module/cpyext/include/numpy/ndarraytypes.h b/pypy/module/cpyext/include/_numpypy/numpy/ndarraytypes.h
rename from pypy/module/cpyext/include/numpy/ndarraytypes.h
rename to pypy/module/cpyext/include/_numpypy/numpy/ndarraytypes.h
diff --git a/pypy/module/cpyext/include/numpy/npy_3kcompat.h b/pypy/module/cpyext/include/_numpypy/numpy/npy_3kcompat.h
rename from pypy/module/cpyext/include/numpy/npy_3kcompat.h
rename to pypy/module/cpyext/include/_numpypy/numpy/npy_3kcompat.h
diff --git a/pypy/module/cpyext/include/numpy/npy_common.h b/pypy/module/cpyext/include/_numpypy/numpy/npy_common.h
rename from pypy/module/cpyext/include/numpy/npy_common.h
rename to pypy/module/cpyext/include/_numpypy/numpy/npy_common.h
diff --git a/pypy/module/cpyext/include/numpy/old_defines.h b/pypy/module/cpyext/include/_numpypy/numpy/old_defines.h
rename from pypy/module/cpyext/include/numpy/old_defines.h
rename to pypy/module/cpyext/include/_numpypy/numpy/old_defines.h
diff --git a/pypy/module/cpyext/include/cStringIO.h b/pypy/module/cpyext/include/cStringIO.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/include/cStringIO.h
@@ -0,0 +1,73 @@
+#ifndef Py_CSTRINGIO_H
+#define Py_CSTRINGIO_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+
+  This header provides access to cStringIO objects from C.
+  Functions are provided for calling cStringIO objects and
+  macros are provided for testing whether you have cStringIO
+  objects.
+
+  Before calling any of the functions or macros, you must initialize
+  the routines with:
+
+    PycString_IMPORT
+
+  This would typically be done in your init function.
+
+*/
+
+#define PycStringIO_CAPSULE_NAME "cStringIO.cStringIO_CAPI"
+
+#define PycString_IMPORT \
+  PycStringIO = ((struct PycStringIO_CAPI*)PyCapsule_Import(\
+    PycStringIO_CAPSULE_NAME, 0))
+
+/* Basic functions to manipulate cStringIO objects from C */
+
+static struct PycStringIO_CAPI {
+
+ /* Read a string from an input object.  If the last argument
+    is -1, the remainder will be read.
+    */
+  int(*cread)(PyObject *, char **, Py_ssize_t);
+
+ /* Read a line from an input object.  Returns the length of the read
+    line as an int and a pointer inside the object buffer as char** (so
+    the caller doesn't have to provide its own buffer as destination).
+    */
+  int(*creadline)(PyObject *, char **);
+
+  /* Write a string to an output object*/
+  int(*cwrite)(PyObject *, const char *, Py_ssize_t);
+
+  /* Get the output object as a Python string (returns new reference). */
+  PyObject *(*cgetvalue)(PyObject *);
+
+  /* Create a new output object */
+  PyObject *(*NewOutput)(int);
+
+  /* Create an input object from a Python string
+     (copies the Python string reference).
+     */
+  PyObject *(*NewInput)(PyObject *);
+
+  /* The Python types for cStringIO input and output objects.
+     Note that you can do input on an output object.
+     */
+  PyTypeObject *InputType, *OutputType;
+
+} *PycStringIO;
+
+/* These can be used to test if you have one */
+#define PycStringIO_InputCheck(O) \
+  (0) /* Py_TYPE(O)==PycStringIO->InputType) */
+#define PycStringIO_OutputCheck(O) \
+  (0) /* Py_TYPE(O)==PycStringIO->OutputType) */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_CSTRINGIO_H */
diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -44,8 +44,8 @@
                    dealloc=cfunction_dealloc)
 
 def cfunction_attach(space, py_obj, w_obj):
+    assert isinstance(w_obj, W_PyCFunctionObject)
     py_func = rffi.cast(PyCFunctionObject, py_obj)
-    assert isinstance(w_obj, W_PyCFunctionObject)
     py_func.c_m_ml = w_obj.ml
     py_func.c_m_self = make_ref(space, w_obj.w_self)
     py_func.c_m_module = make_ref(space, w_obj.w_module)
diff --git a/pypy/module/cpyext/src/ndarrayobject.c b/pypy/module/cpyext/src/ndarrayobject.c
--- a/pypy/module/cpyext/src/ndarrayobject.c
+++ b/pypy/module/cpyext/src/ndarrayobject.c
@@ -1,7 +1,7 @@
 
 #include "Python.h"
 #include "pypy_numpy.h"
-#include "numpy/arrayobject.h"
+#include "_numpypy/numpy/arrayobject.h"
 #include <string.h>   /* memset, memcpy */
 
 void 
diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py
--- a/pypy/module/cpyext/test/test_bytesobject.py
+++ b/pypy/module/cpyext/test/test_bytesobject.py
@@ -252,12 +252,16 @@
         lenp = lltype.malloc(Py_ssize_tP.TO, 1, flavor='raw')
 
         w_text = space.wrapbytes("text")
-        assert api.PyObject_AsCharBuffer(w_text, bufp, lenp) == 0
+        ref = make_ref(space, w_text)
+        prev_refcnt = ref.c_ob_refcnt
+        assert api.PyObject_AsCharBuffer(ref, bufp, lenp) == 0
+        assert ref.c_ob_refcnt == prev_refcnt
         assert lenp[0] == 4
         assert rffi.charp2str(bufp[0]) == 'text'
 
         lltype.free(bufp, flavor='raw')
         lltype.free(lenp, flavor='raw')
+        api.Py_DecRef(ref)
 
     def test_eq(self, space, api):
         assert 1 == api._PyBytes_Eq(space.wrapbytes("hello"), space.wrapbytes("hello"))
diff --git a/pypy/module/cpyext/test/test_ndarrayobject.py b/pypy/module/cpyext/test/test_ndarrayobject.py
--- a/pypy/module/cpyext/test/test_ndarrayobject.py
+++ b/pypy/module/cpyext/test/test_ndarrayobject.py
@@ -1,4 +1,5 @@
 import py
+import os
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 from rpython.rtyper.lltypesystem import rffi, lltype
@@ -238,8 +239,10 @@
             except:
                 skip('numpy not importable')
         else:
-            cls.w_numpy_include = cls.space.wrap([])
-
+            numpy_incl = os.path.abspath(os.path.dirname(__file__) + 
+                                         '/../include/_numpypy')
+            assert os.path.exists(numpy_incl)
+            cls.w_numpy_include = cls.space.wrap([numpy_incl])
 
     def test_ndarray_object_c(self):
         mod = self.import_extension('foo', [
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -125,7 +125,8 @@
         return None
 
     def issubtype_w(self, w_sub, w_type):
-        return w_sub is w_type
+        is_root(w_type)
+        return NonConstant(True)
 
     def isinstance_w(self, w_obj, w_tp):
         try:
@@ -414,6 +415,10 @@
     def warn(self, w_msg, w_warn_type):
         pass
 
+def is_root(w_obj):
+    assert isinstance(w_obj, W_Root)
+is_root.expecting = W_Root
+
 class FloatObject(W_Root):
     tp = FakeSpace.w_float
     def __init__(self, floatval):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_00_model.py b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_00_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_00_model.py
@@ -262,7 +262,7 @@
             [i0]
             i1 = int_add(i0, 1)
             i2 = int_sub(i1, 10)
-            i3 = int_floordiv(i2, 100)
+            i3 = int_xor(i2, 100)
             i4 = int_mul(i1, 1000)
             jump(i4)
         """
@@ -298,7 +298,7 @@
             [i0]
             i1 = int_add(i0, 1)
             i2 = int_sub(i1, 10)
-            i3 = int_floordiv(i2, 100)
+            i3 = int_xor(i2, 100)
             i4 = int_mul(i1, 1000)
             jump(i4)
         """
diff --git a/pypy/module/pypyjit/test_pypy_c/test_shift.py b/pypy/module/pypyjit/test_pypy_c/test_shift.py
--- a/pypy/module/pypyjit/test_pypy_c/test_shift.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_shift.py
@@ -47,26 +47,74 @@
             res = 0
             a = 0
             while a < 300:
-                assert a >= 0
-                assert 0 <= b <= 10
-                res = a/b     # ID: div
+                res1 = a/b     # ID: div
+                res2 = a/2     # ID: shift
+                res3 = a/11    # ID: mul
+                res += res1 + res2 + res3
                 a += 1
             return res
         #
         log = self.run(main, [3])
-        assert log.result == 99
+        assert log.result == main(3)
         loop, = log.loops_by_filename(self.filepath)
-        if sys.maxint == 2147483647:
-            SHIFT = 31
+        assert loop.match_by_id('div', """
+            i56 = int_eq(i48, %d)
+            i57 = int_and(i56, i37)
+            guard_false(i57, descr=...)
+            i1 = call_i(_, i48, i3, descr=...)
+        """ % (-sys.maxint-1,))
+        assert loop.match_by_id('shift', """
+            i1 = int_rshift(i2, 1)
+        """)
+        if sys.maxint > 2**32:
+            args = (63, -5030930201920786804, 3)
         else:
-            SHIFT = 63
-        assert loop.match_by_id('div', """
-            i10 = int_floordiv(i6, i7)
-            i11 = int_mul(i10, i7)
-            i12 = int_sub(i6, i11)
-            i14 = int_rshift(i12, %d)
-            i15 = int_add(i10, i14)
-        """ % SHIFT)
+            args = (31, -1171354717, 3)
+        assert loop.match_by_id('mul', """
+            i2 = int_rshift(i1, %d)
+            i3 = int_xor(i1, i2)
+            i4 = uint_mul_high(i3, %d)
+            i5 = uint_rshift(i4, %d)
+            i6 = int_xor(i5, i2)
+        """ % args)
+
+    def test_modulo_optimization(self):
+        def main(b):
+            res = 0
+            a = 0
+            while a < 300:
+                res1 = a%b     # ID: mod
+                res2 = a%2     # ID: and
+                res3 = a%11    # ID: mul
+                res += res1 + res2 + res3
+                a += 1
+            return res
+        #
+        log = self.run(main, [3])
+        assert log.result == main(3)
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match_by_id('mod', """
+            i56 = int_eq(i48, %d)
+            i57 = int_and(i56, i37)
+            guard_false(i57, descr=...)
+            i1 = call_i(_, i48, i3, descr=...)
+        """ % (-sys.maxint-1,))
+        assert loop.match_by_id('and', """
+            i1 = int_and(i2, 1)
+        """)
+        if sys.maxint > 2**32:
+            args = (63, -5030930201920786804, 3)
+        else:
+            args = (31, -1171354717, 3)
+        assert loop.match_by_id('mul', """
+            i2 = int_rshift(i1, %d)
+            i3 = int_xor(i1, i2)
+            i4 = uint_mul_high(i3, %d)
+            i5 = uint_rshift(i4, %d)
+            i6 = int_xor(i5, i2)
+            i7 = int_mul(i6, 11)
+            i8 = int_sub(i1, i7)
+        """ % args)
 
     def test_division_to_rshift_allcases(self):
         """
diff --git a/pypy/module/pypyjit/test_pypy_c/test_string.py b/pypy/module/pypyjit/test_pypy_c/test_string.py
--- a/pypy/module/pypyjit/test_pypy_c/test_string.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_string.py
@@ -1,11 +1,6 @@
 import sys
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
-if sys.maxint == 2147483647:
-    SHIFT = 31
-else:
-    SHIFT = 63
-
 # XXX review the <Call> descrs to replace some EF=5 with EF=4 (elidable)
 
 
@@ -28,10 +23,7 @@
             guard_true(i14, descr=...)
             guard_not_invalidated(descr=...)
             i16 = int_eq(i6, %d)
-            i15 = int_mod(i6, i10)
-            i17 = int_rshift(i15, %d)
-            i18 = int_and(i10, i17)
-            i19 = int_add(i15, i18)
+            i19 = call_i(ConstClass(ll_int_mod__Signed_Signed), i6, i10, descr=<Calli . ii EF=0 OS=14>)
             i21 = int_lt(i19, 0)
             guard_false(i21, descr=...)
             i22 = int_ge(i19, i10)
@@ -49,7 +41,7 @@
             i34 = int_add(i6, 1)
             --TICK--
             jump(..., descr=...)
-        """ % (-sys.maxint-1, SHIFT))
+        """ % (-sys.maxint-1,))
 
     def test_long(self):
         def main(n):
@@ -62,19 +54,25 @@
         log = self.run(main, [1100], import_site=True)
         assert log.result == main(1100)
         loop, = log.loops_by_filename(self.filepath)
+        if sys.maxint > 2**32:
+            args = (63, -3689348814741910323, 3)
+        else:
+            args = (31, -858993459, 3)
         assert loop.match("""
             i11 = int_lt(i6, i7)
             guard_true(i11, descr=...)
             guard_not_invalidated(descr=...)
             i13 = int_eq(i6, %d)         # value provided below
-            i15 = int_mod(i6, 10)
-            i17 = int_rshift(i15, %d)    # value provided below
-            i18 = int_and(10, i17)
-            i19 = int_add(i15, i18)
-            i21 = int_lt(i19, 0)
-            guard_false(i21, descr=...)
-            i22 = int_ge(i19, 10)
-            guard_false(i22, descr=...)
+
+            # "mod 10" block:
+            i79 = int_rshift(i6, %d)
+            i80 = int_xor(i6, i79)
+            i82 = uint_mul_high(i80, %d)
+            i84 = uint_rshift(i82, %d)
+            i85 = int_xor(i84, i79)
+            i87 = int_mul(i85, 10)
+            i19 = int_sub(i6, i87)
+
             i23 = strgetitem(p10, i19)
             p25 = newstr(1)
             strsetitem(p25, 0, i23)
@@ -89,7 +87,7 @@
             guard_no_overflow(descr=...)
             --TICK--
             jump(..., descr=...)
-        """ % (-sys.maxint-1, SHIFT))
+        """ % ((-sys.maxint-1,)+args))
 
     def test_str_mod(self):
         def main(n):
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -7,6 +7,7 @@
 from rpython.rlib import rstring, runicode, rlocale, rfloat, jit
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rfloat import copysign, formatd
+from rpython.rlib.rarithmetic import r_uint, intmask
 
 
 @specialize.argtype(1)
@@ -836,33 +837,37 @@
                 return s
             # This part is slow.
             negative = value < 0
-            value = abs(value)
+            base = r_uint(base)
+            value = r_uint(value)
+            if negative:   # change the sign on the unsigned number: otherwise,
+                value = -value   #   we'd risk overflow if value==-sys.maxint-1
+            #
             buf = ["\0"] * (8 * 8 + 6) # Too much on 32 bit, but who cares?
             i = len(buf) - 1
             while True:
-                div = value // base
-                mod = value - div * base
-                digit = abs(mod)
+                div = value // base         # unsigned
+                mod = value - div * base    # unsigned, always in range(0,base)
+                digit = intmask(mod)
                 digit += ord("0") if digit < 10 else ord("a") - 10
                 buf[i] = chr(digit)
-                value = div
+                value = div                 # unsigned
                 i -= 1
                 if not value:
                     break
-            if base == 2:
+            if base == r_uint(2):
                 buf[i] = "b"
                 buf[i - 1] = "0"
-            elif base == 8:
+            elif base == r_uint(8):
                 buf[i] = "o"
                 buf[i - 1] = "0"
-            elif base == 16:
+            elif base == r_uint(16):
                 buf[i] = "x"
                 buf[i - 1] = "0"
             else:
                 buf[i] = "#"
-                buf[i - 1] = chr(ord("0") + base % 10)
-                if base > 10:
-                    buf[i - 2] = chr(ord("0") + base // 10)
+                buf[i - 1] = chr(ord("0") + intmask(base % r_uint(10)))
+                if base > r_uint(10):
+                    buf[i - 2] = chr(ord("0") + intmask(base // r_uint(10)))
                     i -= 1
             i -= 1
             if negative:
diff --git a/pypy/tool/release/force-builds.py b/pypy/tool/release/force-builds.py
--- a/pypy/tool/release/force-builds.py
+++ b/pypy/tool/release/force-builds.py
@@ -19,16 +19,16 @@
 BUILDERS = [
     'own-linux-x86-32',
     'own-linux-x86-64',
-    'own-linux-armhf',
+#    'own-linux-armhf',
     'own-win-x86-32',
-    'own-linux-s390x-2',
+    'own-linux-s390x',
 #    'own-macosx-x86-32',
     'pypy-c-jit-linux-x86-32',
     'pypy-c-jit-linux-x86-64',
 #    'pypy-c-jit-freebsd-9-x86-64',
     'pypy-c-jit-macosx-x86-64',
     'pypy-c-jit-win-x86-32',
-    'pypy-c-jit-linux-s390x-2',
+    'pypy-c-jit-linux-s390x',
     'build-pypy-c-jit-linux-armhf-raring',
     'build-pypy-c-jit-linux-armhf-raspbian',
     'build-pypy-c-jit-linux-armel',
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -213,11 +213,6 @@
                    default=False),
         BoolOption("merge_if_blocks", "Merge if ... elif chains",
                    cmdline="--if-block-merge", default=True),
-        BoolOption("raisingop2direct_call",
-                   "Transform operations that can implicitly raise an "
-                   "exception into calls to functions that explicitly "
-                   "raise exceptions",
-                   default=False, cmdline="--raisingop2direct_call"),
         BoolOption("mallocs", "Remove mallocs", default=True),
         BoolOption("constfold", "Constant propagation",
                    default=True),
diff --git a/rpython/jit/backend/arm/codebuilder.py b/rpython/jit/backend/arm/codebuilder.py
--- a/rpython/jit/backend/arm/codebuilder.py
+++ b/rpython/jit/backend/arm/codebuilder.py
@@ -1,6 +1,5 @@
 from rpython.jit.backend.arm import conditions as cond
 from rpython.jit.backend.arm import registers as reg
-from rpython.jit.backend.arm import support
 from rpython.jit.backend.arm.arch import WORD, PC_OFFSET
 from rpython.jit.backend.arm.instruction_builder import define_instructions
 from rpython.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
@@ -17,17 +16,6 @@
     sandboxsafe=True)
 
 
-def binary_helper_call(name):
-    function = getattr(support, 'arm_%s' % name)
-
-    def f(self, c=cond.AL):
-        """Generates a call to a helper function, takes its
-        arguments in r0 and r1, result is placed in r0"""
-        addr = rffi.cast(lltype.Signed, function)
-        self.BL(addr, c)
-    return f
-
-
 class AbstractARMBuilder(object):
     def __init__(self, arch_version=7):
         self.arch_version = arch_version
@@ -348,10 +336,6 @@
         self.write32(c << 28
                     | 0x157ff05f)
 
-    DIV = binary_helper_call('int_div')
-    MOD = binary_helper_call('int_mod')
-    UDIV = binary_helper_call('uint_div')
-
     FMDRR = VMOV_cr     # uh, there are synonyms?
     FMRRD = VMOV_rc
 
diff --git a/rpython/jit/backend/arm/helper/assembler.py b/rpython/jit/backend/arm/helper/assembler.py
--- a/rpython/jit/backend/arm/helper/assembler.py
+++ b/rpython/jit/backend/arm/helper/assembler.py
@@ -46,20 +46,6 @@
     f.__name__ = 'emit_op_%s' % name
     return f
 
-def gen_emit_op_by_helper_call(name, opname):
-    helper = getattr(InstrBuilder, opname)
-    def f(self, op, arglocs, regalloc, fcond):
-        assert fcond is not None
-        if op.type != 'v':
-            regs = r.caller_resp[1:] + [r.ip]
-        else:
-            regs = r.caller_resp
-        with saved_registers(self.mc, regs, r.caller_vfp_resp):
-            helper(self.mc, fcond)
-        return fcond
-    f.__name__ = 'emit_op_%s' % name
-    return f
-
 def gen_emit_cmp_op(name, true_cond):
     def f(self, op, arglocs, regalloc, fcond):
         l0, l1, res = arglocs
diff --git a/rpython/jit/backend/arm/helper/regalloc.py b/rpython/jit/backend/arm/helper/regalloc.py
--- a/rpython/jit/backend/arm/helper/regalloc.py
+++ b/rpython/jit/backend/arm/helper/regalloc.py
@@ -72,25 +72,6 @@
     res = self.force_allocate_reg_or_cc(op)
     return [loc1, loc2, res]
 
-def prepare_op_by_helper_call(name):
-    def f(self, op, fcond):
-        assert fcond is not None
-        a0 = op.getarg(0)
-        a1 = op.getarg(1)
-        arg1 = self.rm.make_sure_var_in_reg(a0, selected_reg=r.r0)
-        arg2 = self.rm.make_sure_var_in_reg(a1, selected_reg=r.r1)
-        assert arg1 == r.r0
-        assert arg2 == r.r1
-        if not isinstance(a0, Const) and self.stays_alive(a0):
-            self.force_spill_var(a0)
-        self.possibly_free_vars_for_op(op)
-        self.free_temp_vars()
-        self.after_call(op)
-        self.possibly_free_var(op)
-        return []
-    f.__name__ = name
-    return f
-
 def prepare_int_cmp(self, op, fcond):
     assert fcond is not None
     boxes = list(op.getarglist())
diff --git a/rpython/jit/backend/arm/opassembler.py b/rpython/jit/backend/arm/opassembler.py
--- a/rpython/jit/backend/arm/opassembler.py
+++ b/rpython/jit/backend/arm/opassembler.py
@@ -3,7 +3,7 @@
 from rpython.jit.backend.arm import registers as r
 from rpython.jit.backend.arm import shift
 from rpython.jit.backend.arm.arch import WORD, DOUBLE_WORD, JITFRAME_FIXED_SIZE
-from rpython.jit.backend.arm.helper.assembler import (gen_emit_op_by_helper_call,
+from rpython.jit.backend.arm.helper.assembler import (
                                                 gen_emit_op_unary_cmp,
                                                 gen_emit_op_ri,
                                                 gen_emit_cmp_op,
@@ -92,6 +92,11 @@
         self.mc.MUL(res.value, reg1.value, reg2.value)
         return fcond
 
+    def emit_op_uint_mul_high(self, op, arglocs, regalloc, fcond):
+        reg1, reg2, res = arglocs
+        self.mc.UMULL(r.ip.value, res.value, reg1.value, reg2.value)
+        return fcond
+
     def emit_op_int_force_ge_zero(self, op, arglocs, regalloc, fcond):
         arg, res = arglocs
         self.mc.CMP_ri(arg.value, 0)
@@ -132,10 +137,6 @@
         self.guard_success_cc = c.VC
         return fcond
 
-    emit_op_int_floordiv = gen_emit_op_by_helper_call('int_floordiv', 'DIV')
-    emit_op_int_mod = gen_emit_op_by_helper_call('int_mod', 'MOD')
-    emit_op_uint_floordiv = gen_emit_op_by_helper_call('uint_floordiv', 'UDIV')
-
     emit_op_int_and = gen_emit_op_ri('int_and', 'AND')
     emit_op_int_or = gen_emit_op_ri('int_or', 'ORR')
     emit_op_int_xor = gen_emit_op_ri('int_xor', 'EOR')
@@ -466,7 +467,11 @@
             assert saveerrloc.is_imm()
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
         return fcond
 
     def _genop_same_as(self, op, arglocs, regalloc, fcond):
diff --git a/rpython/jit/backend/arm/regalloc.py b/rpython/jit/backend/arm/regalloc.py
--- a/rpython/jit/backend/arm/regalloc.py
+++ b/rpython/jit/backend/arm/regalloc.py
@@ -7,7 +7,7 @@
 from rpython.jit.backend.arm import conditions as c
 from rpython.jit.backend.arm import locations
 from rpython.jit.backend.arm.locations import imm, get_fp_offset
-from rpython.jit.backend.arm.helper.regalloc import (prepare_op_by_helper_call,
+from rpython.jit.backend.arm.helper.regalloc import (
                                                     prepare_unary_cmp,
                                                     prepare_op_ri,
                                                     prepare_int_cmp,
@@ -397,9 +397,9 @@
         else:
             self.rm.force_spill_var(var)
 
-    def before_call(self, force_store=[], save_all_regs=False):
-        self.rm.before_call(force_store, save_all_regs)
-        self.vfprm.before_call(force_store, save_all_regs)
+    def before_call(self, save_all_regs=False):
+        self.rm.before_call(save_all_regs)
+        self.vfprm.before_call(save_all_regs)
 
     def _sync_var(self, v):
         if v.type == FLOAT:
@@ -467,6 +467,8 @@
         self.possibly_free_var(op)
         return [reg1, reg2, res]
 
+    prepare_op_uint_mul_high = prepare_op_int_mul
+
     def prepare_op_int_force_ge_zero(self, op, fcond):
         argloc = self.make_sure_var_in_reg(op.getarg(0))
         resloc = self.force_allocate_reg(op, [op.getarg(0)])
@@ -478,10 +480,6 @@
         resloc = self.force_allocate_reg(op)
         return [argloc, imm(numbytes), resloc]
 
-    prepare_op_int_floordiv = prepare_op_by_helper_call('int_floordiv')
-    prepare_op_int_mod = prepare_op_by_helper_call('int_mod')
-    prepare_op_uint_floordiv = prepare_op_by_helper_call('unit_floordiv')
-
     prepare_op_int_and = prepare_op_ri('int_and')
     prepare_op_int_or = prepare_op_ri('int_or')
     prepare_op_int_xor = prepare_op_ri('int_xor')
@@ -554,8 +552,7 @@
     prepare_op_call_f = _prepare_op_call
     prepare_op_call_n = _prepare_op_call
 
-    def _prepare_call(self, op, force_store=[], save_all_regs=False,
-                      first_arg_index=1):
+    def _prepare_call(self, op, save_all_regs=False, first_arg_index=1):
         args = [None] * (op.numargs() + 3)
         calldescr = op.getdescr()
         assert isinstance(calldescr, CallDescr)
@@ -573,17 +570,27 @@
         args[1] = imm(size)
         args[2] = sign_loc
 
-        args[0] = self._call(op, args, force_store, save_all_regs)
+        effectinfo = calldescr.get_extra_info()
+        if save_all_regs:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+
+        args[0] = self._call(op, args, gc_level)
         return args
 
-    def _call(self, op, arglocs, force_store=[], save_all_regs=False):
-        # spill variables that need to be saved around calls
-        self.vfprm.before_call(force_store, save_all_regs=save_all_regs)
-        if not save_all_regs:
-            gcrootmap = self.cpu.gc_ll_descr.gcrootmap
-            if gcrootmap and gcrootmap.is_shadow_stack:
-                save_all_regs = 2
-        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+    def _call(self, op, arglocs, gc_level):
+        # spill variables that need to be saved around calls:
+        # gc_level == 0: callee cannot invoke the GC
+        # gc_level == 1: can invoke GC, save all regs that contain pointers
+        # gc_level == 2: can force, save all regs
+        save_all_regs = gc_level == 2
+        self.vfprm.before_call(save_all_regs=save_all_regs)
+        if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+            save_all_regs = 2
+        self.rm.before_call(save_all_regs=save_all_regs)
         resloc = self.after_call(op)
         return resloc
 
@@ -1070,7 +1077,7 @@
     def _prepare_op_call_assembler(self, op, fcond):
         locs = self.locs_for_call_assembler(op)
         tmploc = self.get_scratch_reg(INT, selected_reg=r.r0)
-        resloc = self._call(op, locs + [tmploc], save_all_regs=True)
+        resloc = self._call(op, locs + [tmploc], gc_level=2)
         return locs + [resloc, tmploc]
 
     prepare_op_call_assembler_i = _prepare_op_call_assembler
diff --git a/rpython/jit/backend/arm/support.py b/rpython/jit/backend/arm/support.py
deleted file mode 100644
--- a/rpython/jit/backend/arm/support.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from rpython.rtyper.lltypesystem import lltype, rffi, llmemory
-from rpython.rlib.rarithmetic import r_uint
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-
-eci = ExternalCompilationInfo(post_include_bits=["""
-static int pypy__arm_int_div(int a, int b) {
-    return a/b;
-}
-static unsigned int pypy__arm_uint_div(unsigned int a, unsigned int b) {
-    return a/b;
-}
-static int pypy__arm_int_mod(int a, int b) {
-    return a % b;
-}
-"""])
-
-
-def arm_int_div_emulator(a, b):
-    return int(a / float(b))
-arm_int_div_sign = lltype.Ptr(
-        lltype.FuncType([lltype.Signed, lltype.Signed], lltype.Signed))
-arm_int_div = rffi.llexternal(
-    "pypy__arm_int_div", [lltype.Signed, lltype.Signed], lltype.Signed,
-                        _callable=arm_int_div_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
-
-
-def arm_uint_div_emulator(a, b):
-    return r_uint(a) / r_uint(b)
-arm_uint_div_sign = lltype.Ptr(
-        lltype.FuncType([lltype.Unsigned, lltype.Unsigned], lltype.Unsigned))
-arm_uint_div = rffi.llexternal(
-    "pypy__arm_uint_div", [lltype.Unsigned, lltype.Unsigned], lltype.Unsigned,
-                        _callable=arm_uint_div_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
-
-
-def arm_int_mod_emulator(a, b):
-    sign = 1
-    if a < 0:
-        a = -1 * a
-        sign = -1
-    if b < 0:
-        b = -1 * b
-    res = a % b
-    return sign * res
-arm_int_mod_sign = arm_int_div_sign
-arm_int_mod = rffi.llexternal(
-    "pypy__arm_int_mod", [lltype.Signed, lltype.Signed], lltype.Signed,
-                        _callable=arm_int_mod_emulator,
-                        compilation_info=eci,
-                        _nowrapper=True, elidable_function=True)
diff --git a/rpython/jit/backend/arm/test/test_arch.py b/rpython/jit/backend/arm/test/test_arch.py
deleted file mode 100644
--- a/rpython/jit/backend/arm/test/test_arch.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from rpython.jit.backend.arm import support
-
-def test_mod():
-    assert support.arm_int_mod(10, 2) == 0
-    assert support.arm_int_mod(11, 2) == 1
-    assert support.arm_int_mod(11, 3) == 2
-
-def test_mod2():
-    assert support.arm_int_mod(-10, 2) == 0
-    assert support.arm_int_mod(-11, 2) == -1
-    assert support.arm_int_mod(-11, 3) == -2
-
-def test_mod3():
-    assert support.arm_int_mod(10, -2) == 0
-    assert support.arm_int_mod(11, -2) == 1
-    assert support.arm_int_mod(11, -3) == 2
-
-
-def test_div():
-    assert support.arm_int_div(-7, 2) == -3
-    assert support.arm_int_div(9, 2) == 4
-    assert support.arm_int_div(10, 5) == 2
-
diff --git a/rpython/jit/backend/arm/test/test_assembler.py b/rpython/jit/backend/arm/test/test_assembler.py
--- a/rpython/jit/backend/arm/test/test_assembler.py
+++ b/rpython/jit/backend/arm/test/test_assembler.py
@@ -193,32 +193,6 @@
         self.a.gen_func_epilog()
         assert run_asm(self.a) == 61
 
-    def test_DIV(self):
-        self.a.gen_func_prolog()
-        self.a.mc.MOV_ri(r.r0.value, 123)
-        self.a.mc.MOV_ri(r.r1.value, 2)
-        self.a.mc.DIV()
-        self.a.gen_func_epilog()
-        assert run_asm(self.a) == 61
-
-    def test_DIV2(self):
-        self.a.gen_func_prolog()
-        self.a.mc.gen_load_int(r.r0.value, -110)
-        self.a.mc.gen_load_int(r.r1.value, 3)
-        self.a.mc.DIV()
-        self.a.gen_func_epilog()
-        assert run_asm(self.a) == -36
-
-    def test_DIV3(self):
-        self.a.gen_func_prolog()
-        self.a.mc.gen_load_int(r.r8.value, 110)
-        self.a.mc.gen_load_int(r.r9.value, -3)
-        self.a.mc.MOV_rr(r.r0.value, r.r8.value)
-        self.a.mc.MOV_rr(r.r1.value, r.r9.value)
-        self.a.mc.DIV()
-        self.a.gen_func_epilog()
-        assert run_asm(self.a) == -36
-
     def test_bl_with_conditional_exec(self):
         functype = lltype.Ptr(lltype.FuncType([lltype.Signed], lltype.Signed))
         call_addr = rffi.cast(lltype.Signed, llhelper(functype, callme))
diff --git a/rpython/jit/backend/llsupport/regalloc.py b/rpython/jit/backend/llsupport/regalloc.py
--- a/rpython/jit/backend/llsupport/regalloc.py
+++ b/rpython/jit/backend/llsupport/regalloc.py
@@ -574,27 +574,113 @@
             self.assembler.regalloc_mov(reg, to)
         # otherwise it's clean
 
+    def _bc_spill(self, v, new_free_regs):
+        self._sync_var(v)
+        new_free_regs.append(self.reg_bindings.pop(v))
+
     def before_call(self, force_store=[], save_all_regs=0):
-        """ Spill registers before a call, as described by
-        'self.save_around_call_regs'.  Registers are not spilled if
-        they don't survive past the current operation, unless they
-        are listed in 'force_store'.  'save_all_regs' can be 0 (default),
-        1 (save all), or 2 (save default+PTRs).
+        """Spill or move some registers before a call.  By default,
+        this means: for every register in 'self.save_around_call_regs',
+        if there is a variable there and it survives longer than
+        the current operation, then it is spilled/moved somewhere else.
+
+        'save_all_regs' can be 0 (default set of registers), 1 (do that
+        for all registers), or 2 (default + gc ptrs).
+
+        Overview of what we do (the implementation does it differently,
+        for the same result):
+
+        * we first check the set of registers that are free: call it F.
+
+        * possibly_free_vars() is implied for all variables (except
+          the ones listed in force_store): if they don't survive past
+          the current operation, they are forgotten now.  (Their
+          register remain not in F, because they are typically
+          arguments to the call, so they should not be overwritten by
+          the next step.)
+
+        * then for every variable that needs to be spilled/moved: if
+          there is an entry in F that is acceptable, pick it and emit a
+          move.  Otherwise, emit a spill.  Start doing this with the
+          variables that survive the shortest time, to give them a
+          better change to remain in a register---similar algo as
+          _pick_variable_to_spill().
+
+        Note: when a register is moved, it often (but not always) means
+        we could have been more clever and picked a better register in
+        the first place, when we did so earlier.  It is done this way
+        anyway, as a local hack in this function, because on x86 CPUs
+        such register-register moves are almost free.
         """
+        new_free_regs = []
+        move_or_spill = []
+
         for v, reg in self.reg_bindings.items():
-            if v not in force_store and self.longevity[v][1] <= self.position:
+            max_age = self.longevity[v][1]
+            if v not in force_store and max_age <= self.position:
                 # variable dies
                 del self.reg_bindings[v]
-                self.free_regs.append(reg)
+                new_free_regs.append(reg)
                 continue
-            if save_all_regs != 1 and reg not in self.save_around_call_regs:
-                if save_all_regs == 0:
-                    continue    # we don't have to
-                if v.type != REF:
-                    continue    # only save GC pointers
-            self._sync_var(v)
-            del self.reg_bindings[v]
-            self.free_regs.append(reg)
+
+            if save_all_regs == 1:
+                # we need to spill all registers in this mode
+                self._bc_spill(v, new_free_regs)
+                #
+            elif save_all_regs == 2 and v.type == REF:
+                # we need to spill all GC ptrs in this mode
+                self._bc_spill(v, new_free_regs)
+                #
+            elif reg not in self.save_around_call_regs:
+                continue  # in a register like ebx/rbx: it is fine where it is
+                #
+            else:
+                # this is a register like eax/rax, which needs either
+                # spilling or moving.
+                move_or_spill.append((v, max_age))
+
+        if len(move_or_spill) > 0:
+            while len(self.free_regs) > 0:
+                new_reg = self.free_regs.pop()
+                if new_reg in self.save_around_call_regs:
+                    new_free_regs.append(new_reg)    # not this register...
+                    continue
+                # This 'new_reg' is suitable for moving a candidate to.
+                # Pick the one with the smallest max_age.  (This
+                # is one step of a naive sorting algo, slow in theory,
+                # but the list should always be very small so it
+                # doesn't matter.)
+                best_i = 0
+                smallest_max_age = move_or_spill[0][1]
+                for i in range(1, len(move_or_spill)):
+                    max_age = move_or_spill[i][1]
+                    if max_age < smallest_max_age:
+                        best_i = i
+                        smallest_max_age = max_age
+                v, max_age = move_or_spill.pop(best_i)
+                # move from 'reg' to 'new_reg'
+                reg = self.reg_bindings[v]
+                if not we_are_translated():
+                    if move_or_spill:
+                        assert max_age <= min([_a for _, _a in move_or_spill])
+                    assert reg in self.save_around_call_regs
+                    assert new_reg not in self.save_around_call_regs
+                self.assembler.regalloc_mov(reg, new_reg)
+                self.reg_bindings[v] = new_reg    # change the binding
+                new_free_regs.append(reg)
+                #
+                if len(move_or_spill) == 0:
+                    break
+            else:
+                # no more free registers to move to, spill the rest
+                for v, max_age in move_or_spill:
+                    self._bc_spill(v, new_free_regs)
+
+        # re-add registers in 'new_free_regs', but in reverse order,
+        # so that the last ones (added just above, from
+        # save_around_call_regs) are picked last by future '.pop()'
+        while len(new_free_regs) > 0:
+            self.free_regs.append(new_free_regs.pop())
 
     def after_call(self, v):
         """ Adjust registers according to the result of the call,
diff --git a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
--- a/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
+++ b/rpython/jit/backend/llsupport/test/test_regalloc_integration.py
@@ -496,22 +496,6 @@
         self.interpret(ops, [s, ord('a')])
         assert s[1] == 'a'
 
-    def test_division_optimized(self):
-        ops = '''
-        [i7, i6]
-        label(i7, i6, descr=targettoken)
-        i18 = int_floordiv(i7, i6)
-        i19 = int_xor(i7, i6)
-        i21 = int_lt(i19, 0)
-        i22 = int_mod(i7, i6)
-        i23 = int_is_true(i22)
-        i24 = int_eq(i6, 4)
-        guard_false(i24) [i18]
-        jump(i18, i6, descr=targettoken)
-        '''
-        self.interpret(ops, [10, 4])
-        assert self.getint(0) == 2
-        # FIXME: Verify that i19 - i23 are removed
 
 class TestRegallocFloats(BaseTestRegalloc):
     def setup_class(cls):
diff --git a/rpython/jit/backend/ppc/opassembler.py b/rpython/jit/backend/ppc/opassembler.py
--- a/rpython/jit/backend/ppc/opassembler.py
+++ b/rpython/jit/backend/ppc/opassembler.py
@@ -62,6 +62,12 @@
         else:
             self.mc.mulld(res.value, l0.value, l1.value)
 
+    def emit_uint_mul_high(self, op, arglocs, regalloc):
+        l0, l1, res = arglocs
+        assert not l0.is_imm()
+        assert not l1.is_imm()
+        self.mc.mulhdu(res.value, l0.value, l1.value)
+
     def do_emit_int_binary_ovf(self, op, arglocs):
         l0, l1, res = arglocs[0], arglocs[1], arglocs[2]
         self.mc.load_imm(r.SCRATCH, 0)
@@ -80,24 +86,6 @@
         else:
             self.mc.mulldox(*self.do_emit_int_binary_ovf(op, arglocs))
 
-    def emit_int_floordiv(self, op, arglocs, regalloc):
-        l0, l1, res = arglocs
-        if IS_PPC_32:
-            self.mc.divw(res.value, l0.value, l1.value)
-        else:
-            self.mc.divd(res.value, l0.value, l1.value)
-
-    def emit_int_mod(self, op, arglocs, regalloc):
-        l0, l1, res = arglocs
-        if IS_PPC_32:
-            self.mc.divw(r.r0.value, l0.value, l1.value)
-            self.mc.mullw(r.r0.value, r.r0.value, l1.value)
-        else:
-            self.mc.divd(r.r0.value, l0.value, l1.value)
-            self.mc.mulld(r.r0.value, r.r0.value, l1.value)
-        self.mc.subf(r.r0.value, r.r0.value, l0.value)
-        self.mc.mr(res.value, r.r0.value)
-
     def emit_int_and(self, op, arglocs, regalloc):
         l0, l1, res = arglocs
         self.mc.and_(res.value, l0.value, l1.value)
@@ -130,13 +118,6 @@
             self.mc.srw(res.value, l0.value, l1.value)
         else:
             self.mc.srd(res.value, l0.value, l1.value)
-    
-    def emit_uint_floordiv(self, op, arglocs, regalloc):
-        l0, l1, res = arglocs
-        if IS_PPC_32:
-            self.mc.divwu(res.value, l0.value, l1.value)
-        else:
-            self.mc.divdu(res.value, l0.value, l1.value)
 
     emit_int_le = gen_emit_cmp_op(c.LE)
     emit_int_lt = gen_emit_cmp_op(c.LT)
@@ -622,7 +603,11 @@
             assert saveerrloc.is_imm()
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
 
     def _genop_call(self, op, arglocs, regalloc):
         oopspecindex = regalloc.get_oopspecindex(op)
diff --git a/rpython/jit/backend/ppc/regalloc.py b/rpython/jit/backend/ppc/regalloc.py
--- a/rpython/jit/backend/ppc/regalloc.py
+++ b/rpython/jit/backend/ppc/regalloc.py
@@ -1,6 +1,7 @@
 from rpython.jit.backend.llsupport.regalloc import (RegisterManager, FrameManager,
                                                     TempVar, compute_vars_longevity,
                                                     BaseRegalloc)
+from rpython.jit.backend.llsupport.descr import CallDescr
 from rpython.jit.backend.ppc.arch import (WORD, MY_COPY_OF_REGS, IS_PPC_32)
 from rpython.jit.codewriter import longlong
 from rpython.jit.backend.ppc.jump import (remap_frame_layout,
@@ -369,9 +370,9 @@
         # This operation is used only for testing
         self.force_spill_var(op.getarg(0))
 
-    def before_call(self, force_store=[], save_all_regs=False):
-        self.rm.before_call(force_store, save_all_regs)
-        self.fprm.before_call(force_store, save_all_regs)
+    def before_call(self, save_all_regs=False):
+        self.rm.before_call(save_all_regs)
+        self.fprm.before_call(save_all_regs)
 
     def after_call(self, v):
         if v.type == FLOAT:
@@ -432,15 +433,13 @@
     prepare_int_mul = helper.prepare_int_add_or_mul
     prepare_nursery_ptr_increment = prepare_int_add
 
-    prepare_int_floordiv = helper.prepare_binary_op
-    prepare_int_mod = helper.prepare_binary_op
     prepare_int_and = helper.prepare_binary_op
     prepare_int_or = helper.prepare_binary_op
     prepare_int_xor = helper.prepare_binary_op
     prepare_int_lshift = helper.prepare_binary_op
     prepare_int_rshift = helper.prepare_binary_op
     prepare_uint_rshift = helper.prepare_binary_op
-    prepare_uint_floordiv = helper.prepare_binary_op
+    prepare_uint_mul_high = helper.prepare_binary_op
 
     prepare_int_add_ovf = helper.prepare_binary_op
     prepare_int_sub_ovf = helper.prepare_binary_op
@@ -758,7 +757,7 @@
         src_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(2))
         dst_ofs_loc = self.ensure_reg_or_any_imm(op.getarg(3))
         length_loc  = self.ensure_reg_or_any_imm(op.getarg(4))
-        self._spill_before_call(save_all_regs=False)
+        self._spill_before_call(gc_level=0)
         return [src_ptr_loc, dst_ptr_loc,
                 src_ofs_loc, dst_ofs_loc, length_loc]
 
@@ -791,13 +790,15 @@
     prepare_call_f = _prepare_call
     prepare_call_n = _prepare_call
 
-    def _spill_before_call(self, save_all_regs=False):
-        # spill variables that need to be saved around calls
+    def _spill_before_call(self, gc_level):
+        # spill variables that need to be saved around calls:
+        # gc_level == 0: callee cannot invoke the GC
+        # gc_level == 1: can invoke GC, save all regs that contain pointers
+        # gc_level == 2: can force, save all regs
+        save_all_regs = gc_level == 2
         self.fprm.before_call(save_all_regs=save_all_regs)
-        if not save_all_regs:
-            gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
-            if gcrootmap and gcrootmap.is_shadow_stack:
-                save_all_regs = 2
+        if gc_level == 1 and self.cpu.gc_ll_descr.gcrootmap:
+            save_all_regs = 2
         self.rm.before_call(save_all_regs=save_all_regs)
 
     def _prepare_call(self, op, save_all_regs=False):
@@ -805,7 +806,18 @@
         args.append(None)
         for i in range(op.numargs()):
             args.append(self.loc(op.getarg(i)))
-        self._spill_before_call(save_all_regs)
+
+        calldescr = op.getdescr()
+        assert isinstance(calldescr, CallDescr)
+        effectinfo = calldescr.get_extra_info()
+        if save_all_regs:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+        self._spill_before_call(gc_level=gc_level)
+
         if op.type != VOID:
             resloc = self.after_call(op)
             args[0] = resloc
@@ -934,7 +946,7 @@
 
     def _prepare_call_assembler(self, op):
         locs = self.locs_for_call_assembler(op)
-        self._spill_before_call(save_all_regs=True)
+        self._spill_before_call(gc_level=2)
         if op.type != VOID:
             resloc = self.after_call(op)
         else:
diff --git a/rpython/jit/backend/test/test_random.py b/rpython/jit/backend/test/test_random.py
--- a/rpython/jit/backend/test/test_random.py
+++ b/rpython/jit/backend/test/test_random.py
@@ -532,6 +532,7 @@
             rop.INT_AND,
             rop.INT_OR,
             rop.INT_XOR,
+            rop.UINT_MUL_HIGH,
             ]:
     OPERATIONS.append(BinaryOperation(_op))
 
@@ -548,8 +549,8 @@
             ]:
     OPERATIONS.append(BinaryOperation(_op, boolres=True))
 
-OPERATIONS.append(BinaryOperation(rop.INT_FLOORDIV, ~3, 2))
-OPERATIONS.append(BinaryOperation(rop.INT_MOD, ~3, 2))
+#OPERATIONS.append(BinaryOperation(rop.INT_FLOORDIV, ~3, 2))
+#OPERATIONS.append(BinaryOperation(rop.INT_MOD, ~3, 2))
 OPERATIONS.append(BinaryOperation(rop.INT_RSHIFT, LONG_BIT-1))
 OPERATIONS.append(BinaryOperation(rop.INT_LSHIFT, LONG_BIT-1))
 OPERATIONS.append(BinaryOperation(rop.UINT_RSHIFT, LONG_BIT-1))
diff --git a/rpython/jit/backend/x86/assembler.py b/rpython/jit/backend/x86/assembler.py
--- a/rpython/jit/backend/x86/assembler.py
+++ b/rpython/jit/backend/x86/assembler.py
@@ -1289,6 +1289,9 @@
     genop_float_mul = _binaryop('MULSD')
     genop_float_truediv = _binaryop('DIVSD')
 
+    def genop_uint_mul_high(self, op, arglocs, result_loc):
+        self.mc.MUL(arglocs[0])
+
     def genop_int_and(self, op, arglocs, result_loc):
         arg1 = arglocs[1]
         if IS_X86_64 and (isinstance(arg1, ImmedLoc) and
@@ -1444,20 +1447,6 @@
         self.mov(imm0, resloc)
         self.mc.CMOVNS(resloc, arglocs[0])
 
-    def genop_int_mod(self, op, arglocs, resloc):
-        if IS_X86_32:
-            self.mc.CDQ()
-        elif IS_X86_64:
-            self.mc.CQO()
-
-        self.mc.IDIV_r(ecx.value)
-
-    genop_int_floordiv = genop_int_mod
-
-    def genop_uint_floordiv(self, op, arglocs, resloc):
-        self.mc.XOR_rr(edx.value, edx.value)
-        self.mc.DIV_r(ecx.value)
-
     genop_llong_add = _binaryop("PADDQ")
     genop_llong_sub = _binaryop("PSUBQ")
     genop_llong_and = _binaryop("PAND")
@@ -2123,7 +2112,11 @@
             assert isinstance(saveerrloc, ImmedLoc)
             cb.emit_call_release_gil(saveerrloc.value)
         else:
-            cb.emit()
+            effectinfo = descr.get_extra_info()
+            if effectinfo is None or effectinfo.check_can_collect():
+                cb.emit()
+            else:
+                cb.emit_no_collect()
 
     def _store_force_index(self, guard_op):
         assert (guard_op.getopnum() == rop.GUARD_NOT_FORCED or
diff --git a/rpython/jit/backend/x86/regalloc.py b/rpython/jit/backend/x86/regalloc.py
--- a/rpython/jit/backend/x86/regalloc.py
+++ b/rpython/jit/backend/x86/regalloc.py
@@ -561,6 +561,27 @@
     consider_int_sub_ovf = _consider_binop
     consider_int_add_ovf = _consider_binop_symm
 
+    def consider_uint_mul_high(self, op):
+        arg1, arg2 = op.getarglist()
+        # should support all cases, but is optimized for (box, const)
+        if isinstance(arg1, Const):
+            arg1, arg2 = arg2, arg1
+        self.rm.make_sure_var_in_reg(arg2, selected_reg=eax)
+        l1 = self.loc(arg1)
+        # l1 is a register != eax, or stack_bp; or, just possibly, it
+        # can be == eax if arg1 is arg2
+        assert not isinstance(l1, ImmedLoc)
+        assert l1 is not eax or arg1 is arg2
+        #
+        # eax will be trash after the operation
+        self.rm.possibly_free_var(arg2)
+        tmpvar = TempVar()
+        self.rm.force_allocate_reg(tmpvar, selected_reg=eax)
+        self.rm.possibly_free_var(tmpvar)
+        #
+        self.rm.force_allocate_reg(op, selected_reg=edx)
+        self.perform(op, [l1], edx)
+
     def consider_int_neg(self, op):
         res = self.rm.force_result_in_reg(op, op.getarg(0))
         self.perform(op, [res], res)
@@ -585,29 +606,6 @@
     consider_int_rshift  = consider_int_lshift
     consider_uint_rshift = consider_int_lshift
 
-    def _consider_int_div_or_mod(self, op, resultreg, trashreg):
-        l0 = self.rm.make_sure_var_in_reg(op.getarg(0), selected_reg=eax)
-        l1 = self.rm.make_sure_var_in_reg(op.getarg(1), selected_reg=ecx)
-        l2 = self.rm.force_allocate_reg(op, selected_reg=resultreg)
-        # the register (eax or edx) not holding what we are looking for
-        # will be just trash after that operation
-        tmpvar = TempVar()
-        self.rm.force_allocate_reg(tmpvar, selected_reg=trashreg)
-        assert l0 is eax
-        assert l1 is ecx
-        assert l2 is resultreg
-        self.rm.possibly_free_var(tmpvar)
-
-    def consider_int_mod(self, op):
-        self._consider_int_div_or_mod(op, edx, eax)
-        self.perform(op, [eax, ecx], edx)
-
-    def consider_int_floordiv(self, op):
-        self._consider_int_div_or_mod(op, eax, edx)
-        self.perform(op, [eax, ecx], eax)
-
-    consider_uint_floordiv = consider_int_floordiv
-
     def _consider_compop(self, op):
         vx = op.getarg(0)
         vy = op.getarg(1)
@@ -797,22 +795,22 @@
         else:
             self._consider_call(op)
 
-    def _call(self, op, arglocs, force_store=[], guard_not_forced=False):
+    def _call(self, op, arglocs, gc_level):
         # we need to save registers on the stack:
         #
         #  - at least the non-callee-saved registers
         #
-        #  - we assume that any call can collect, and we
-        #    save also the callee-saved registers that contain GC pointers
+        #  - if gc_level > 0, we save also the callee-saved registers that
+        #    contain GC pointers
         #
-        #  - for CALL_MAY_FORCE or CALL_ASSEMBLER, we have to save all regs
-        #    anyway, in case we need to do cpu.force().  The issue is that
-        #    grab_frame_values() would not be able to locate values in
-        #    callee-saved registers.
+        #  - gc_level == 2 for CALL_MAY_FORCE or CALL_ASSEMBLER.  We
+        #    have to save all regs anyway, in case we need to do
+        #    cpu.force().  The issue is that grab_frame_values() would
+        #    not be able to locate values in callee-saved registers.
         #
-        save_all_regs = guard_not_forced
-        self.xrm.before_call(force_store, save_all_regs=save_all_regs)
-        if not save_all_regs:
+        save_all_regs = gc_level == 2
+        self.xrm.before_call(save_all_regs=save_all_regs)
+        if gc_level == 1:
             gcrootmap = self.assembler.cpu.gc_ll_descr.gcrootmap
             # we save all the registers for shadowstack and asmgcc for now
             # --- for asmgcc too: we can't say "register x is a gc ref"
@@ -820,7 +818,7 @@
             # more for now.
             if gcrootmap: # and gcrootmap.is_shadow_stack:
                 save_all_regs = 2
-        self.rm.before_call(force_store, save_all_regs=save_all_regs)
+        self.rm.before_call(save_all_regs=save_all_regs)
         if op.type != 'v':
             if op.type == FLOAT:
                 resloc = self.xrm.after_call(op)
@@ -840,9 +838,18 @@
             sign_loc = imm1
         else:
             sign_loc = imm0
+        #
+        effectinfo = calldescr.get_extra_info()
+        if guard_not_forced:
+            gc_level = 2
+        elif effectinfo is None or effectinfo.check_can_collect():
+            gc_level = 1
+        else:
+            gc_level = 0
+        #
         self._call(op, [imm(size), sign_loc] +
                        [self.loc(op.getarg(i)) for i in range(op.numargs())],
-                   guard_not_forced=guard_not_forced)
+                   gc_level=gc_level)
 
     def _consider_real_call(self, op):
         effectinfo = op.getdescr().get_extra_info()
@@ -901,7 +908,7 @@
 
     def _consider_call_assembler(self, op):
         locs = self.locs_for_call_assembler(op)
-        self._call(op, locs, guard_not_forced=True)
+        self._call(op, locs, gc_level=2)
     consider_call_assembler_i = _consider_call_assembler
     consider_call_assembler_r = _consider_call_assembler
     consider_call_assembler_f = _consider_call_assembler
diff --git a/rpython/jit/backend/x86/regloc.py b/rpython/jit/backend/x86/regloc.py
--- a/rpython/jit/backend/x86/regloc.py
+++ b/rpython/jit/backend/x86/regloc.py
@@ -641,6 +641,7 @@
     SUB = _binaryop('SUB')
     IMUL = _binaryop('IMUL')
     NEG = _unaryop('NEG')
+    MUL = _unaryop('MUL')
 
     CMP = _binaryop('CMP')
     CMP16 = _binaryop('CMP16')
diff --git a/rpython/jit/backend/x86/rx86.py b/rpython/jit/backend/x86/rx86.py
--- a/rpython/jit/backend/x86/rx86.py
+++ b/rpython/jit/backend/x86/rx86.py
@@ -558,6 +558,9 @@
     DIV_r = insn(rex_w, '\xF7', register(1), '\xF0')
     IDIV_r = insn(rex_w, '\xF7', register(1), '\xF8')
 
+    MUL_r = insn(rex_w, '\xF7', orbyte(4<<3), register(1), '\xC0')
+    MUL_b = insn(rex_w, '\xF7', orbyte(4<<3), stack_bp(1))
+
     IMUL_rr = insn(rex_w, '\x0F\xAF', register(1, 8), register(2), '\xC0')
     IMUL_rb = insn(rex_w, '\x0F\xAF', register(1, 8), stack_bp(2))
 
diff --git a/rpython/jit/codewriter/call.py b/rpython/jit/codewriter/call.py
--- a/rpython/jit/codewriter/call.py
+++ b/rpython/jit/codewriter/call.py
@@ -14,6 +14,7 @@
 from rpython.translator.backendopt.canraise import RaiseAnalyzer
 from rpython.translator.backendopt.writeanalyze import ReadWriteAnalyzer
 from rpython.translator.backendopt.graphanalyze import DependencyTracker
+from rpython.translator.backendopt.collectanalyze import CollectAnalyzer
 
 
 class CallControl(object):
@@ -37,9 +38,9 @@
             self.virtualizable_analyzer = VirtualizableAnalyzer(translator)
             self.quasiimmut_analyzer = QuasiImmutAnalyzer(translator)
             self.randomeffects_analyzer = RandomEffectsAnalyzer(translator)
-            self.seen = DependencyTracker(self.readwrite_analyzer)
-        else:
-            self.seen = None
+            self.collect_analyzer = CollectAnalyzer(translator)
+            self.seen_rw = DependencyTracker(self.readwrite_analyzer)
+            self.seen_gc = DependencyTracker(self.collect_analyzer)
         #
         for index, jd in enumerate(jitdrivers_sd):
             jd.index = index
@@ -294,9 +295,9 @@
                     "but the function has no result" % (op, ))
         #
         effectinfo = effectinfo_from_writeanalyze(
-            self.readwrite_analyzer.analyze(op, self.seen), self.cpu,
+            self.readwrite_analyzer.analyze(op, self.seen_rw), self.cpu,
             extraeffect, oopspecindex, can_invalidate, call_release_gil_target,
-            extradescr,
+            extradescr, self.collect_analyzer.analyze(op, self.seen_gc),
         )
         #
         assert effectinfo is not None
diff --git a/rpython/jit/codewriter/effectinfo.py b/rpython/jit/codewriter/effectinfo.py
--- a/rpython/jit/codewriter/effectinfo.py
+++ b/rpython/jit/codewriter/effectinfo.py
@@ -28,6 +28,11 @@
     OS_THREADLOCALREF_GET       = 5    # llop.threadlocalref_get
     OS_NOT_IN_TRACE             = 8    # for calls not recorded in the jit trace
     #
+    OS_INT_PY_DIV               = 12   # python signed division (neg. corrected)
+    OS_INT_UDIV                 = 13   # regular unsigned division
+    OS_INT_PY_MOD               = 14   # python signed modulo (neg. corrected)
+    OS_INT_UMOD                 = 15   # regular unsigned modulo
+    #
     OS_STR_CONCAT               = 22   # "stroruni.concat"
     OS_STR_SLICE                = 23   # "stroruni.slice"
     OS_STR_EQUAL                = 24   # "stroruni.equal"
@@ -111,7 +116,8 @@
                 oopspecindex=OS_NONE,
                 can_invalidate=False,
                 call_release_gil_target=_NO_CALL_RELEASE_GIL_TARGET,
-                extradescrs=None):
+                extradescrs=None,
+                can_collect=True):
         readonly_descrs_fields = frozenset_or_none(readonly_descrs_fields)
         readonly_descrs_arrays = frozenset_or_none(readonly_descrs_arrays)
         readonly_descrs_interiorfields = frozenset_or_none(
@@ -128,7 +134,8 @@
                write_descrs_interiorfields,
                extraeffect,
                oopspecindex,
-               can_invalidate)
+               can_invalidate,
+               can_collect)
         tgt_func, tgt_saveerr = call_release_gil_target
         if tgt_func:
             key += (object(),)    # don't care about caching in this case
@@ -179,6 +186,7 @@
         #
         result.extraeffect = extraeffect
         result.can_invalidate = can_invalidate
+        result.can_collect = can_collect
         result.oopspecindex = oopspecindex
         result.extradescrs = extradescrs