[pypy-commit] pypy py3.5: hg merge py3k

Thu Sep 22 05:30:31 EDT 2016

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r87309:7f7862ca4a74
Date: 2016-09-22 11:29 +0200
http://bitbucket.org/pypy/pypy/changeset/7f7862ca4a74/

Log:	hg merge py3k

diff too long, truncating to 2000 out of 3083 lines

diff --git a/lib-python/2.7/distutils/sysconfig_pypy.py b/lib-python/2.7/distutils/sysconfig_pypy.py
--- a/lib-python/2.7/distutils/sysconfig_pypy.py
+++ b/lib-python/2.7/distutils/sysconfig_pypy.py
@@ -13,6 +13,7 @@
 import sys
 import os
 import shlex
+import imp
 
 from distutils.errors import DistutilsPlatformError
 
@@ -62,8 +63,7 @@
     """Initialize the module as appropriate for POSIX systems."""
     g = {}
     g['EXE'] = ""
-    g['SO'] = ".so"
-    g['SOABI'] = g['SO'].rsplit('.')[0]
+    g['SO'] = [s[0] for s in imp.get_suffixes() if s[2] == imp.C_EXTENSION][0]
     g['LIBDIR'] = os.path.join(sys.prefix, 'lib')
     g['CC'] = "gcc -pthread" # -pthread might not be valid on OS/X, check
 
@@ -75,8 +75,7 @@
     """Initialize the module as appropriate for NT"""
     g = {}
     g['EXE'] = ".exe"
-    g['SO'] = ".pyd"
-    g['SOABI'] = g['SO'].rsplit('.')[0]
+    g['SO'] = [s[0] for s in imp.get_suffixes() if s[2] == imp.C_EXTENSION][0]
 
     global _config_vars
     _config_vars = g
diff --git a/lib-python/2.7/sysconfig.py b/lib-python/2.7/sysconfig.py
--- a/lib-python/2.7/sysconfig.py
+++ b/lib-python/2.7/sysconfig.py
@@ -529,7 +529,7 @@
         for suffix, mode, type_ in imp.get_suffixes():
             if type_ == imp.C_EXTENSION:
                 _CONFIG_VARS['SOABI'] = suffix.split('.')[1]
-                break
+                break        
 
     if args:
         vals = []
diff --git a/lib_pypy/_pypy_winbase_build.py b/lib_pypy/_pypy_winbase_build.py
--- a/lib_pypy/_pypy_winbase_build.py
+++ b/lib_pypy/_pypy_winbase_build.py
@@ -83,6 +83,7 @@
 BOOL WINAPI GetExitCodeProcess(HANDLE, LPDWORD);
 BOOL WINAPI TerminateProcess(HANDLE, UINT);
 HANDLE WINAPI GetStdHandle(DWORD);
+DWORD WINAPI GetModuleFileNameW(HANDLE, wchar_t *, DWORD);
 """)
 
 # --------------------
diff --git a/lib_pypy/_winapi.py b/lib_pypy/_winapi.py
--- a/lib_pypy/_winapi.py
+++ b/lib_pypy/_winapi.py
@@ -162,18 +162,19 @@
         return int(_ffi.cast("intptr_t", res))
 
 def CloseHandle(handle):
-    res = _CloseHandle(handle)
+    res = _kernel32.CloseHandle(_ffi.cast("HANDLE", handle))
 
     if not res:
         raise _WinError()
 
 def GetModuleFileName(module):
-    buf = ctypes.create_unicode_buffer(_MAX_PATH)
-    res = _GetModuleFileNameW(module, buf, _MAX_PATH)
+    buf = _ffi.new("wchar_t[]", _MAX_PATH)
+    res = _kernel32.GetModuleFileNameW(_ffi.cast("HANDLE", module),
+                                       buf, _MAX_PATH)
 
     if not res:
         raise _WinError()
-    return buf.value
+    return _ffi.string(buf)
 
 STD_INPUT_HANDLE = -10
 STD_OUTPUT_HANDLE = -11
diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO
--- a/lib_pypy/cffi.egg-info/PKG-INFO
+++ b/lib_pypy/cffi.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: cffi
-Version: 1.8.2
+Version: 1.8.4
 Summary: Foreign Function Interface for Python calling C code.
 Home-page: http://cffi.readthedocs.org
 Author: Armin Rigo, Maciej Fijalkowski
diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py
--- a/lib_pypy/cffi/__init__.py
+++ b/lib_pypy/cffi/__init__.py
@@ -4,8 +4,8 @@
 from .api import FFI, CDefError, FFIError
 from .ffiplatform import VerificationError, VerificationMissing
 
-__version__ = "1.8.2"
-__version_info__ = (1, 8, 2)
+__version__ = "1.8.4"
+__version_info__ = (1, 8, 4)
 
 # The verifier module file names are based on the CRC32 of a string that
 # contains the following version number.  It may be older than __version__
diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h
--- a/lib_pypy/cffi/_embedding.h
+++ b/lib_pypy/cffi/_embedding.h
@@ -233,7 +233,7 @@
         f = PySys_GetObject((char *)"stderr");
         if (f != NULL && f != Py_None) {
             PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME
-                               "\ncompiled with cffi version: 1.8.2"
+                               "\ncompiled with cffi version: 1.8.4"
                                "\n_cffi_backend module: ", f);
             modules = PyImport_GetModuleDict();
             mod = PyDict_GetItemString(modules, "_cffi_backend");
diff --git a/lib_pypy/cffi/cparser.py b/lib_pypy/cffi/cparser.py
--- a/lib_pypy/cffi/cparser.py
+++ b/lib_pypy/cffi/cparser.py
@@ -332,7 +332,7 @@
                         realtype = model.unknown_ptr_type(decl.name)
                     else:
                         realtype, quals = self._get_type_and_quals(
-                            decl.type, name=decl.name)
+                            decl.type, name=decl.name, partial_length_ok=True)
                     self._declare('typedef ' + decl.name, realtype, quals=quals)
                 else:
                     raise api.CDefError("unrecognized construct", decl)
@@ -781,11 +781,14 @@
                 exprnode.name in self._int_constants):
             return self._int_constants[exprnode.name]
         #
-        if partial_length_ok:
-            if (isinstance(exprnode, pycparser.c_ast.ID) and
+        if (isinstance(exprnode, pycparser.c_ast.ID) and
                     exprnode.name == '__dotdotdotarray__'):
+            if partial_length_ok:
                 self._partial_length = True
                 return '...'
+            raise api.FFIError(":%d: unsupported '[...]' here, cannot derive "
+                               "the actual array length in this context"
+                               % exprnode.coord.line)
         #
         raise api.FFIError(":%d: unsupported expression: expected a "
                            "simple numeric constant" % exprnode.coord.line)
diff --git a/lib_pypy/cffi/recompiler.py b/lib_pypy/cffi/recompiler.py
--- a/lib_pypy/cffi/recompiler.py
+++ b/lib_pypy/cffi/recompiler.py
@@ -587,8 +587,11 @@
     # ----------
     # typedefs
 
+    def _typedef_type(self, tp, name):
+        return self._global_type(tp, "(*(%s *)0)" % (name,))
+
     def _generate_cpy_typedef_collecttype(self, tp, name):
-        self._do_collect_type(tp)
+        self._do_collect_type(self._typedef_type(tp, name))
 
     def _generate_cpy_typedef_decl(self, tp, name):
         pass
@@ -598,6 +601,7 @@
         self._lsts["typename"].append(TypenameExpr(name, type_index))
 
     def _generate_cpy_typedef_ctx(self, tp, name):
+        tp = self._typedef_type(tp, name)
         self._typedef_ctx(tp, name)
         if getattr(tp, "origin", None) == "unknown_type":
             self._struct_ctx(tp, tp.name, approxname=None)
diff --git a/pypy/doc/config/translation.profopt.txt b/pypy/doc/config/translation.profopt.txt
--- a/pypy/doc/config/translation.profopt.txt
+++ b/pypy/doc/config/translation.profopt.txt
@@ -3,3 +3,14 @@
 RPython program) to gather profile data. Example for pypy-c: "-c 'from
 richards import main;main(); from test import pystone;
 pystone.main()'"
+
+NOTE: be aware of what this does in JIT-enabled executables.  What it
+does is instrument and later optimize the C code that happens to run in
+the example you specify, ignoring any execution of the JIT-generated
+assembler.  That means that you have to choose the example wisely.  If
+it is something that will just generate assembler and stay there, there
+is little value.  If it is something that exercises heavily library
+routines that are anyway written in C, then it will optimize that.  Most
+interesting would be something that causes a lot of JIT-compilation,
+like running a medium-sized test suite several times in a row, in order
+to optimize the warm-up in general.
diff --git a/pypy/doc/cpython_differences.rst b/pypy/doc/cpython_differences.rst
--- a/pypy/doc/cpython_differences.rst
+++ b/pypy/doc/cpython_differences.rst
@@ -449,6 +449,27 @@
   support (see ``multiline_input()``).  On the other hand,
   ``parse_and_bind()`` calls are ignored (issue `#2072`_).
 
+* ``sys.getsizeof()`` always raises ``TypeError``.  This is because a
+  memory profiler using this function is most likely to give results
+  inconsistent with reality on PyPy.  It would be possible to have
+  ``sys.getsizeof()`` return a number (with enough work), but that may
+  or may not represent how much memory the object uses.  It doesn't even
+  make really sense to ask how much *one* object uses, in isolation with
+  the rest of the system.  For example, instances have maps, which are
+  often shared across many instances; in this case the maps would
+  probably be ignored by an implementation of ``sys.getsizeof()``, but
+  their overhead is important in some cases if they are many instances
+  with unique maps.  Conversely, equal strings may share their internal
+  string data even if they are different objects---or empty containers
+  may share parts of their internals as long as they are empty.  Even
+  stranger, some lists create objects as you read them; if you try to
+  estimate the size in memory of ``range(10**6)`` as the sum of all
+  items' size, that operation will by itself create one million integer
+  objects that never existed in the first place.  Note that some of
+  these concerns also exist on CPython, just less so.  For this reason
+  we explicitly don't implement ``sys.getsizeof()``.
+
+
 .. _`is ignored in PyPy`: http://bugs.python.org/issue14621
 .. _`little point`: http://events.ccc.de/congress/2012/Fahrplan/events/5152.en.html
 .. _`#2072`: https://bitbucket.org/pypy/pypy/issue/2072/
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -16,3 +16,8 @@
 Improve merging of virtual states in the JIT in order to avoid jumping to the
 preamble. Accomplished by allocating virtual objects where non-virtuals are
 expected.
+
+.. branch: conditional_call_value_3
+JIT residual calls: if the called function starts with a fast-path
+like "if x.foo != 0: return x.foo", then inline the check before
+doing the CALL.  For now, string hashing is about the only case.
diff --git a/pypy/goal/targetpypystandalone.py b/pypy/goal/targetpypystandalone.py
--- a/pypy/goal/targetpypystandalone.py
+++ b/pypy/goal/targetpypystandalone.py
@@ -246,6 +246,10 @@
                 raise Exception("Cannot use the --output option with PyPy "
                                 "when --shared is on (it is by default). "
                                 "See issue #1971.")
+            if config.translation.profopt is not None:
+                raise Exception("Cannot use the --profopt option "
+                                "when --shared is on (it is by default). "
+                                "See issue #2398.")
         if sys.platform == 'win32':
             libdir = thisdir.join('..', '..', 'libs')
             libdir.ensure(dir=1)
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1961,12 +1961,10 @@
     'NotImplementedError',
     'OSError',
     'OverflowError',
-    'PendingDeprecationWarning',
     'ReferenceError',
     'ResourceWarning',
     'RecursionError',
     'RuntimeError',
-    'RuntimeWarning',
     'StopIteration',
     'SyntaxError',
     'SyntaxWarning',
@@ -1980,10 +1978,12 @@
     'UnicodeError',
     'UnicodeTranslateError',
     'UnicodeWarning',
-    'UserWarning',
     'ValueError',
     'Warning',
-    'ZeroDivisionError'
+    'ZeroDivisionError',
+    'RuntimeWarning',
+    'PendingDeprecationWarning',
+    'UserWarning',
 ]
 
 if sys.platform.startswith("win"):
diff --git a/pypy/interpreter/test/test_generator.py b/pypy/interpreter/test/test_generator.py
--- a/pypy/interpreter/test/test_generator.py
+++ b/pypy/interpreter/test/test_generator.py
@@ -57,12 +57,14 @@
         def f():
             yield 2
         g = f()
+        # two arguments version
         raises(NameError, g.throw, NameError, "Error")
 
     def test_throw2(self):
         def f():
             yield 2
         g = f()
+        # single argument version
         raises(NameError, g.throw, NameError("Error"))
 
     def test_throw3(self):
@@ -236,7 +238,8 @@
         def f():
             yield 1
         g = f()
-        raises(TypeError, g.send, 1)
+        raises(TypeError, g.send)     # one argument required
+        raises(TypeError, g.send, 1)  # not started, must send None
 
     def test_generator_explicit_stopiteration(self):
         def f():
diff --git a/pypy/interpreter/test/test_pyframe.py b/pypy/interpreter/test/test_pyframe.py
--- a/pypy/interpreter/test/test_pyframe.py
+++ b/pypy/interpreter/test/test_pyframe.py
@@ -583,3 +583,21 @@
         assert a2ref() is None, "stack not cleared"
         #
         raises(StopIteration, next, gen)
+
+    def test_throw_trace_bug(self):
+        import sys
+        def f():
+            yield 5
+        gen = f()
+        assert next(gen) == 5
+        seen = []
+        def trace_func(frame, event, *args):
+            seen.append(event)
+            return trace_func
+        sys.settrace(trace_func)
+        try:
+            gen.throw(ValueError)
+        except ValueError:
+            pass
+        sys.settrace(None)
+        assert seen == ['call', 'exception', 'return']
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -3,7 +3,7 @@
 from rpython.rlib import rdynload, clibffi, entrypoint
 from rpython.rtyper.lltypesystem import rffi
 
-VERSION = "1.8.2"
+VERSION = "1.8.4"
 
 FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI
 try:
diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -11,7 +11,7 @@
 from rpython.rlib.rarithmetic import ovfcheck
 
 from pypy.module._cffi_backend import cdataobj
-from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray
+from pypy.module._cffi_backend.ctypeptr import W_CTypePtrOrArray, W_CTypePointer
 from pypy.module._cffi_backend import ctypeprim
 
 
@@ -22,6 +22,7 @@
     is_nonfunc_pointer_or_array = True
 
     def __init__(self, space, ctptr, length, arraysize, extra):
+        assert isinstance(ctptr, W_CTypePointer)
         W_CTypePtrOrArray.__init__(self, space, arraysize, extra, 0,
                                    ctptr.ctitem)
         self.length = length
diff --git a/pypy/module/_cffi_backend/ctypefunc.py b/pypy/module/_cffi_backend/ctypefunc.py
--- a/pypy/module/_cffi_backend/ctypefunc.py
+++ b/pypy/module/_cffi_backend/ctypefunc.py
@@ -35,8 +35,7 @@
         assert isinstance(ellipsis, bool)
         extra, xpos = self._compute_extra_text(fargs, fresult, ellipsis, abi)
         size = rffi.sizeof(rffi.VOIDP)
-        W_CTypePtrBase.__init__(self, space, size, extra, xpos, fresult,
-                                could_cast_anything=False)
+        W_CTypePtrBase.__init__(self, space, size, extra, xpos, fresult)
         self.fargs = fargs
         self.ellipsis = ellipsis
         self.abi = abi
@@ -59,6 +58,16 @@
                     lltype.free(self.cif_descr, flavor='raw')
                     self.cif_descr = lltype.nullptr(CIF_DESCRIPTION)
 
+    def is_unichar_ptr_or_array(self):
+        return False
+
+    def is_char_or_unichar_ptr_or_array(self):
+        return False
+
+    def string(self, cdataobj, maxlen):
+        # Can't use ffi.string() on a function pointer
+        return W_CType.string(self, cdataobj, maxlen)
+
     def new_ctypefunc_completing_argtypes(self, args_w):
         space = self.space
         nargs_declared = len(self.fargs)
diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py
--- a/pypy/module/_cffi_backend/ctypeobj.py
+++ b/pypy/module/_cffi_backend/ctypeobj.py
@@ -19,7 +19,6 @@
     # XXX this could be improved with an elidable method get_size()
     # that raises in case it's still -1...
 
-    cast_anything = False
     is_primitive_integer = False
     is_nonfunc_pointer_or_array = False
     is_indirect_arg_for_call_python = False
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -120,7 +120,6 @@
 
 class W_CTypePrimitiveChar(W_CTypePrimitiveCharOrUniChar):
     _attrs_ = []
-    cast_anything = True
 
     def cast_to_int(self, cdata):
         return self.space.wrap(ord(cdata[0]))
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -16,12 +16,11 @@
 
 
 class W_CTypePtrOrArray(W_CType):
-    _attrs_            = ['ctitem', 'can_cast_anything', 'accept_str', 'length']
-    _immutable_fields_ = ['ctitem', 'can_cast_anything', 'accept_str', 'length']
+    _attrs_            = ['ctitem', 'accept_str', 'length']
+    _immutable_fields_ = ['ctitem', 'accept_str', 'length']
     length = -1
 
-    def __init__(self, space, size, extra, extra_position, ctitem,
-                 could_cast_anything=True):
+    def __init__(self, space, size, extra, extra_position, ctitem):
         name, name_position = ctitem.insert_name(extra, extra_position)
         W_CType.__init__(self, space, size, name, name_position)
         # this is the "underlying type":
@@ -29,10 +28,11 @@
         #  - for arrays, it is the array item type
         #  - for functions, it is the return type
         self.ctitem = ctitem
-        self.can_cast_anything = could_cast_anything and ctitem.cast_anything
-        self.accept_str = (self.can_cast_anything or
-                            (ctitem.is_primitive_integer and
-                             ctitem.size == rffi.sizeof(lltype.Char)))
+        self.accept_str = (self.is_nonfunc_pointer_or_array and
+                (isinstance(ctitem, ctypevoid.W_CTypeVoid) or
+                 isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar) or
+                 (ctitem.is_primitive_integer and
+                  ctitem.size == rffi.sizeof(lltype.Char))))
 
     def is_unichar_ptr_or_array(self):
         return isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveUniChar)
@@ -139,7 +139,10 @@
 
 class W_CTypePtrBase(W_CTypePtrOrArray):
     # base class for both pointers and pointers-to-functions
-    _attrs_ = []
+    _attrs_ = ['is_void_ptr', 'is_voidchar_ptr']
+    _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr']
+    is_void_ptr = False
+    is_voidchar_ptr = False
 
     def convert_to_object(self, cdata):
         ptrdata = rffi.cast(rffi.CCHARPP, cdata)[0]
@@ -156,7 +159,16 @@
             else:
                 raise self._convert_error("compatible pointer", w_ob)
         if self is not other:
-            if not (self.can_cast_anything or other.can_cast_anything):
+            if self.is_void_ptr or other.is_void_ptr:
+                pass     # cast from or to 'void *'
+            elif self.is_voidchar_ptr or other.is_voidchar_ptr:
+                space = self.space
+                msg = ("implicit cast from '%s' to '%s' "
+                    "will be forbidden in the future (check that the types "
+                    "are as you expect; use an explicit ffi.cast() if they "
+                    "are correct)" % (other.name, self.name))
+                space.warn(space.wrap(msg), space.w_UserWarning, stacklevel=1)
+            else:
                 raise self._convert_error("compatible pointer", w_ob)
 
         rffi.cast(rffi.CCHARPP, cdata)[0] = w_ob.unsafe_escaping_ptr()
@@ -167,8 +179,8 @@
 
 
 class W_CTypePointer(W_CTypePtrBase):
-    _attrs_ = ['is_file', 'cache_array_type', 'is_void_ptr', '_array_types']
-    _immutable_fields_ = ['is_file', 'cache_array_type?', 'is_void_ptr']
+    _attrs_ = ['is_file', 'cache_array_type', '_array_types']
+    _immutable_fields_ = ['is_file', 'cache_array_type?']
     kind = "pointer"
     cache_array_type = None
     is_nonfunc_pointer_or_array = True
@@ -183,6 +195,8 @@
         self.is_file = (ctitem.name == "struct _IO_FILE" or
                         ctitem.name == "FILE")
         self.is_void_ptr = isinstance(ctitem, ctypevoid.W_CTypeVoid)
+        self.is_voidchar_ptr = (self.is_void_ptr or
+                           isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar))
         W_CTypePtrBase.__init__(self, space, size, extra, 2, ctitem)
 
     def newp(self, w_init, allocator):
diff --git a/pypy/module/_cffi_backend/ctypevoid.py b/pypy/module/_cffi_backend/ctypevoid.py
--- a/pypy/module/_cffi_backend/ctypevoid.py
+++ b/pypy/module/_cffi_backend/ctypevoid.py
@@ -7,7 +7,6 @@
 
 class W_CTypeVoid(W_CType):
     _attrs_ = []
-    cast_anything = True
     kind = "void"
 
     def __init__(self, space):
diff --git a/pypy/module/_cffi_backend/handle.py b/pypy/module/_cffi_backend/handle.py
--- a/pypy/module/_cffi_backend/handle.py
+++ b/pypy/module/_cffi_backend/handle.py
@@ -32,8 +32,8 @@
 @unwrap_spec(w_cdata=cdataobj.W_CData)
 def from_handle(space, w_cdata):
     ctype = w_cdata.ctype
-    if (not isinstance(ctype, ctypeptr.W_CTypePtrOrArray) or
-        not ctype.can_cast_anything):
+    if (not isinstance(ctype, ctypeptr.W_CTypePointer) or
+        not ctype.is_voidchar_ptr):
         raise oefmt(space.w_TypeError,
                     "expected a 'cdata' object with a 'void *' out of "
                     "new_handle(), got '%s'", ctype.name)
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -1,7 +1,7 @@
 # ____________________________________________________________
 
 import sys
-assert __version__ == "1.8.2", ("This test_c.py file is for testing a version"
+assert __version__ == "1.8.4", ("This test_c.py file is for testing a version"
                                 " of cffi that differs from the one that we"
                                 " get from 'import _cffi_backend'")
 if sys.version_info < (3,):
@@ -3665,3 +3665,27 @@
     check_dir(pp, [])
     check_dir(pp[0], ['a1', 'a2'])
     check_dir(pp[0][0], ['a1', 'a2'])
+
+def test_char_pointer_conversion():
+    import warnings
+    assert __version__.startswith(("1.8", "1.9")), (
+        "consider turning the warning into an error")
+    BCharP = new_pointer_type(new_primitive_type("char"))
+    BIntP = new_pointer_type(new_primitive_type("int"))
+    BVoidP = new_pointer_type(new_void_type())
+    z1 = cast(BCharP, 0)
+    z2 = cast(BIntP, 0)
+    z3 = cast(BVoidP, 0)
+    with warnings.catch_warnings(record=True) as w:
+        newp(new_pointer_type(BIntP), z1)    # warn
+        assert len(w) == 1
+        newp(new_pointer_type(BVoidP), z1)   # fine
+        assert len(w) == 1
+        newp(new_pointer_type(BCharP), z2)   # warn
+        assert len(w) == 2
+        newp(new_pointer_type(BVoidP), z2)   # fine
+        assert len(w) == 2
+        newp(new_pointer_type(BCharP), z3)   # fine
+        assert len(w) == 2
+        newp(new_pointer_type(BIntP), z3)    # fine
+        assert len(w) == 2
diff --git a/pypy/module/_cffi_backend/test/test_ffi_obj.py b/pypy/module/_cffi_backend/test/test_ffi_obj.py
--- a/pypy/module/_cffi_backend/test/test_ffi_obj.py
+++ b/pypy/module/_cffi_backend/test/test_ffi_obj.py
@@ -503,3 +503,15 @@
         assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
         p = ffi.new("int[]", [-123456789])
         assert ffi.unpack(p, 1) == [-123456789]
+
+    def test_bug_1(self):
+        import _cffi_backend as _cffi1_backend
+        ffi = _cffi1_backend.FFI()
+        q = ffi.new("char[]", b"abcd")
+        p = ffi.cast("char(*)(void)", q)
+        raises(TypeError, ffi.string, p)
+
+    def test_negative_array_size(self):
+        import _cffi_backend as _cffi1_backend
+        ffi = _cffi1_backend.FFI()
+        raises(ffi.error, ffi.cast, "int[-5]", 0)
diff --git a/pypy/module/_cffi_backend/test/test_recompiler.py b/pypy/module/_cffi_backend/test/test_recompiler.py
--- a/pypy/module/_cffi_backend/test/test_recompiler.py
+++ b/pypy/module/_cffi_backend/test/test_recompiler.py
@@ -8,7 +8,7 @@
 
 @unwrap_spec(cdef=str, module_name=str, source=str)
 def prepare(space, cdef, module_name, source, w_includes=None,
-            w_extra_source=None):
+            w_extra_source=None, w_min_version=None):
     try:
         import cffi
         from cffi import FFI            # <== the system one, which
@@ -16,8 +16,13 @@
         from cffi import ffiplatform
     except ImportError:
         py.test.skip("system cffi module not found or older than 1.0.0")
-    if cffi.__version_info__ < (1, 4, 0):
-        py.test.skip("system cffi module needs to be at least 1.4.0")
+    if w_min_version is None:
+        min_version = (1, 4, 0)
+    else:
+        min_version = tuple(space.unwrap(w_min_version))
+    if cffi.__version_info__ < min_version:
+        py.test.skip("system cffi module needs to be at least %s, got %s" % (
+            min_version, cffi.__version_info__))
     space.appexec([], """():
         import _cffi_backend     # force it to be initialized
     """)
@@ -1790,3 +1795,28 @@
                                 "void f(void) { }")
         assert lib.f.__get__(42) is lib.f
         assert lib.f.__get__(42, int) is lib.f
+
+    def test_typedef_array_dotdotdot(self):
+        ffi, lib = self.prepare("""
+            typedef int foo_t[...], bar_t[...];
+            int gv[...];
+            typedef int mat_t[...][...];
+            typedef int vmat_t[][...];
+            """,
+            "test_typedef_array_dotdotdot", """
+            typedef int foo_t[50], bar_t[50];
+            int gv[23];
+            typedef int mat_t[6][7];
+            typedef int vmat_t[][8];
+        """, min_version=(1, 8, 4))
+        assert ffi.sizeof("foo_t") == 50 * ffi.sizeof("int")
+        assert ffi.sizeof("bar_t") == 50 * ffi.sizeof("int")
+        assert len(ffi.new("foo_t")) == 50
+        assert len(ffi.new("bar_t")) == 50
+        assert ffi.sizeof(lib.gv) == 23 * ffi.sizeof("int")
+        assert ffi.sizeof("mat_t") == 6 * 7 * ffi.sizeof("int")
+        assert len(ffi.new("mat_t")) == 6
+        assert len(ffi.new("mat_t")[3]) == 7
+        raises(ffi.error, ffi.sizeof, "vmat_t")
+        p = ffi.new("vmat_t", 4)
+        assert ffi.sizeof(p[3]) == 8 * ffi.sizeof("int")
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -1126,8 +1126,8 @@
         a[3] = b'x'
         b = memoryview(a)
         assert len(b) == 10
-        assert b[3] == b'x'
-        b[6] = b'y'
+        assert b[3] == ord(b'x')
+        b[6] = ord(b'y')
         assert a[6] == b'y'
         b[3:5] = b'zt'
         assert a[3] == b'z'
@@ -1135,9 +1135,9 @@
 
         b = memoryview(a)
         assert len(b) == 10
-        assert b[3] == b'z'
-        b[3] = b'x'
-        assert b[3] == b'x'
+        assert b[3] == ord(b'z')
+        b[3] = ord(b'x')
+        assert b[3] == ord(b'x')
 
     def test_pypy_raw_address(self):
         import _rawffi
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -33,28 +33,27 @@
         if typecode == tc:
             a = space.allocate_instance(types[tc].w_class, w_cls)
             a.__init__(space)
-
-            if len(__args__.arguments_w) > 0:
-                w_initializer = __args__.arguments_w[0]
-                if isinstance(w_initializer, W_ArrayBase):
-                    a.extend(w_initializer, True)
-                elif space.type(w_initializer) is space.w_list:
-                    a.descr_fromlist(space, w_initializer)
-                else:
-                    try:
-                        buf = space.bufferstr_w(w_initializer)
-                    except OperationError as e:
-                        if not e.match(space, space.w_TypeError):
-                            raise
-                        a.extend(w_initializer, True)
-                    else:
-                        a.descr_frombytes(space, buf)
             break
     else:
         raise oefmt(space.w_ValueError,
                     "bad typecode (must be b, B, u, h, H, i, I, l, L, q, Q, f "
                     "or d)")
 
+    if len(__args__.arguments_w) > 0:
+        w_initializer = __args__.arguments_w[0]
+        if isinstance(w_initializer, W_ArrayBase):
+            a.extend(w_initializer, True)
+        elif space.type(w_initializer) is space.w_list:
+            a.descr_fromlist(space, w_initializer)
+        else:
+            try:
+                buf = space.bufferstr_w(w_initializer)
+            except OperationError as e:
+                if not e.match(space, space.w_TypeError):
+                    raise
+                a.extend(w_initializer, True)
+            else:
+                a.descr_frombytes(space, buf)
     return a
 
 
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -5,7 +5,6 @@
 from pypy.objspace.std.longobject import W_LongObject
 from pypy.interpreter.error import OperationError
 from rpython.rlib.rbigint import rbigint
-from rpython.rlib.rarithmetic import intmask
 
 
 PyLong_Check, PyLong_CheckExact = build_type_checkers("Long", "w_int")
@@ -27,25 +26,25 @@
     """Return a new PyLongObject object from a C size_t, or NULL on
     failure.
     """
-    return space.wrap(val)
+    return space.newlong_from_rarith_int(val)
 
 @cpython_api([rffi.LONGLONG], PyObject)
 def PyLong_FromLongLong(space, val):
     """Return a new PyLongObject object from a C long long, or NULL
     on failure."""
-    return space.wrap(val)
+    return space.newlong_from_rarith_int(val)
 
 @cpython_api([rffi.ULONG], PyObject)
 def PyLong_FromUnsignedLong(space, val):
     """Return a new PyLongObject object from a C unsigned long, or
     NULL on failure."""
-    return space.wrap(val)
+    return space.newlong_from_rarith_int(val)
 
 @cpython_api([rffi.ULONGLONG], PyObject)
 def PyLong_FromUnsignedLongLong(space, val):
     """Return a new PyLongObject object from a C unsigned long long,
     or NULL on failure."""
-    return space.wrap(val)
+    return space.newlong_from_rarith_int(val)
 
 @cpython_api([PyObject], rffi.ULONG, error=-1)
 def PyLong_AsUnsignedLong(space, w_long):
@@ -212,7 +211,10 @@
     can be retrieved from the resulting value using PyLong_AsVoidPtr().
 
     If the integer is larger than LONG_MAX, a positive long integer is returned."""
-    return space.wrap(rffi.cast(ADDR, p))
+    value = rffi.cast(ADDR, p)    # signed integer
+    if value < 0:
+        return space.newlong_from_rarith_int(rffi.cast(lltype.Unsigned, p))
+    return space.wrap(value)
 
 @cpython_api([PyObject], rffi.VOIDP, error=lltype.nullptr(rffi.VOIDP.TO))
 def PyLong_AsVoidPtr(space, w_long):
diff --git a/pypy/module/cpyext/pytraceback.py b/pypy/module/cpyext/pytraceback.py
--- a/pypy/module/cpyext/pytraceback.py
+++ b/pypy/module/cpyext/pytraceback.py
@@ -5,7 +5,6 @@
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, from_ref, Py_DecRef, make_typedescr)
 from pypy.module.cpyext.frameobject import PyFrameObject
-from rpython.rlib.unroll import unrolling_iterable
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.pytraceback import PyTraceback
 from pypy.interpreter import pycode
diff --git a/pypy/module/cpyext/test/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -1,5 +1,6 @@
 import sys, py
 from rpython.rtyper.lltypesystem import rffi, lltype
+from rpython.rlib.rarithmetic import maxint
 from pypy.objspace.std.longobject import W_LongObject
 from pypy.module.cpyext.test.test_api import BaseApiTest
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
@@ -7,18 +8,20 @@
 
 class TestLongObject(BaseApiTest):
     def test_FromLong(self, space, api):
-        value = api.PyLong_FromLong(3)
-        assert isinstance(value, W_LongObject)
-        assert space.unwrap(value) == 3
+        w_value = api.PyLong_FromLong(3)
+        assert isinstance(w_value, W_LongObject)
+        assert space.unwrap(w_value) == 3
 
-        value = api.PyLong_FromLong(sys.maxint)
-        assert isinstance(value, W_LongObject)
-        assert space.unwrap(value) == sys.maxint
+        w_value = api.PyLong_FromLong(sys.maxint)
+        assert isinstance(w_value, W_LongObject)
+        assert space.unwrap(w_value) == sys.maxint
 
     def test_aslong(self, space, api):
         w_value = api.PyLong_FromLong((sys.maxint - 1) / 2)
+        assert isinstance(w_value, W_LongObject)
 
         w_value = space.mul(w_value, space.wrap(2))
+        assert isinstance(w_value, W_LongObject)
         value = api.PyLong_AsLong(w_value)
         assert value == (sys.maxint - 1)
 
@@ -34,12 +37,16 @@
 
     def test_as_ssize_t(self, space, api):
         w_value = space.newlong(2)
+        assert isinstance(w_value, W_LongObject)
         value = api.PyLong_AsSsize_t(w_value)
         assert value == 2
-        assert space.eq_w(w_value, api.PyLong_FromSsize_t(2))
+        w_val2 = api.PyLong_FromSsize_t(2)
+        assert isinstance(w_val2, W_LongObject)
+        assert space.eq_w(w_value, w_val2)
 
     def test_fromdouble(self, space, api):
         w_value = api.PyLong_FromDouble(-12.74)
+        assert space.isinstance_w(w_value, space.w_int)
         assert space.unwrap(w_value) == -12
         assert api.PyLong_AsDouble(w_value) == -12
 
@@ -102,9 +109,22 @@
 
     def test_as_voidptr(self, space, api):
         w_l = api.PyLong_FromVoidPtr(lltype.nullptr(rffi.VOIDP.TO))
-        assert space.unwrap(w_l) == 0L
+        assert space.is_w(space.type(w_l), space.w_int)
+        assert space.unwrap(w_l) == 0
         assert api.PyLong_AsVoidPtr(w_l) == lltype.nullptr(rffi.VOIDP.TO)
 
+        p = rffi.cast(rffi.VOIDP, maxint)
+        w_l = api.PyLong_FromVoidPtr(p)
+        assert space.is_w(space.type(w_l), space.w_int)
+        assert space.unwrap(w_l) == maxint
+        assert api.PyLong_AsVoidPtr(w_l) == p
+
+        p = rffi.cast(rffi.VOIDP, -maxint-1)
+        w_l = api.PyLong_FromVoidPtr(p)
+        assert space.is_w(space.type(w_l), space.w_int)
+        assert space.unwrap(w_l) == maxint+1
+        assert api.PyLong_AsVoidPtr(w_l) == p
+
     def test_sign_and_bits(self, space, api):
         if space.is_true(space.lt(space.sys.get('version_info'),
                                   space.wrap((2, 7)))):
@@ -133,23 +153,58 @@
         module = self.import_extension('foo', [
             ("from_unsignedlong", "METH_NOARGS",
              """
-                 return PyLong_FromUnsignedLong((unsigned long)-1);
+                 PyObject * obj;
+                 obj = PyLong_FromUnsignedLong((unsigned long)-1);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """)])
         import sys
         assert module.from_unsignedlong() == 2 * sys.maxsize + 1
 
     def test_fromlonglong(self):
         module = self.import_extension('foo', [
-            ("from_longlong", "METH_NOARGS",
+            ("from_longlong", "METH_VARARGS",
              """
-                 return PyLong_FromLongLong((long long)-1);
+                 int val;
+                 PyObject * obj;
+                 if (!PyArg_ParseTuple(args, "i", &val))
+                     return NULL;
+                 obj = PyLong_FromLongLong((long long)val);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """),
-            ("from_unsignedlonglong", "METH_NOARGS",
+            ("from_unsignedlonglong", "METH_VARARGS",
              """
-                 return PyLong_FromUnsignedLongLong((unsigned long long)-1);
+                 int val;
+                 PyObject * obj;
+                 if (!PyArg_ParseTuple(args, "i", &val))
+                     return NULL;
+                 obj = PyLong_FromUnsignedLongLong((long long)val);
+                 if (obj->ob_type != &PyLong_Type)
+                 {
+                    Py_DECREF(obj);
+                    PyErr_SetString(PyExc_ValueError,
+                            "PyLong_FromLongLong did not return PyLongObject");
+                    return NULL;
+                 }
+                 return obj;
              """)])
-        assert module.from_longlong() == -1
-        assert module.from_unsignedlonglong() == (1<<64) - 1
+        assert module.from_longlong(-1) == -1
+        assert module.from_longlong(0) == 0
+        assert module.from_unsignedlonglong(0) == 0
+        assert module.from_unsignedlonglong(-1) == (1<<64) - 1
 
     def test_from_size_t(self):
         module = self.import_extension('foo', [
@@ -228,7 +283,7 @@
                     return PyLong_FromLong(3);
                  if (str + strlen(str) != end)
                     return PyLong_FromLong(4);
-                 return PyLong_FromLong(0); 
+                 return PyLong_FromLong(0);
              """)])
         assert module.from_str() == 0
 
@@ -237,10 +292,15 @@
             ("has_sub", "METH_NOARGS",
              """
                 PyObject *ret, *obj = PyLong_FromLong(42);
-                if (obj->ob_type->tp_as_number->nb_subtract)
-                    ret = obj->ob_type->tp_as_number->nb_subtract(obj, obj);
+                if (obj->ob_type != &PyLong_Type)
+                    ret = PyLong_FromLong(-2);
                 else
-                    ret = PyLong_FromLong(-1);
+                {
+                    if (obj->ob_type->tp_as_number->nb_subtract)
+                        ret = obj->ob_type->tp_as_number->nb_subtract(obj, obj);
+                    else
+                        ret = PyLong_FromLong(-1);
+                }
                 Py_DECREF(obj);
                 return ret;
              """),
@@ -279,4 +339,4 @@
         assert module.has_pow() == 0
         assert module.has_hex() == '0x2aL'
         assert module.has_oct() == '052L'
-                
+
diff --git a/pypy/module/cpyext/test/test_version.py b/pypy/module/cpyext/test/test_version.py
--- a/pypy/module/cpyext/test/test_version.py
+++ b/pypy/module/cpyext/test/test_version.py
@@ -3,7 +3,7 @@
 import py, pytest
 from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
 
-only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names" 
+only_pypy ="config.option.runappdirect and '__pypy__' not in sys.builtin_module_names"
 
 def test_pragma_version():
     from pypy.module.sys.version import CPYTHON_VERSION
@@ -46,10 +46,18 @@
     def test_pypy_versions(self):
         import sys
         init = """
+        static struct PyModuleDef moduledef = {
+                PyModuleDef_HEAD_INIT,
+                "foo",          /* m_name */
+                NULL,           /* m_doc */
+                -1,             /* m_size */
+                NULL            /* m_methods */
+            };
         if (Py_IsInitialized()) {
-            PyObject *m = Py_InitModule("foo", NULL);
+            PyObject *m = PyModule_Create(&moduledef);
             PyModule_AddStringConstant(m, "pypy_version", PYPY_VERSION);
             PyModule_AddIntConstant(m, "pypy_version_num", PYPY_VERSION_NUM);
+            return m;
         }
         Py_RETURN_NONE;
         """
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -40,9 +40,6 @@
     st_atime = structseqfield(11, "time of last access")
     st_mtime = structseqfield(12, "time of last modification")
     st_ctime = structseqfield(13, "time of last change")
-    st_atime_ns = structseqfield(14, "time of last access in nanoseconds")
-    st_mtime_ns = structseqfield(15, "time of last modification in nanoseconds")
-    st_ctime_ns = structseqfield(16, "time of last change in nanoseconds")
 
     if "st_blksize" in posix._statfields:
         st_blksize = structseqfield(20, "blocksize for filesystem I/O")
@@ -64,6 +61,21 @@
         if self.st_ctime is None:
             self.__dict__['st_ctime'] = self[9]
 
+    @property
+    def st_atime_ns(self):
+        "time of last access in nanoseconds"
+        return int(self[7]) * 1000000000 + self.nsec_atime
+
+    @property
+    def st_mtime_ns(self):
+        "time of last modification in nanoseconds"
+        return int(self[8]) * 1000000000 + self.nsec_mtime
+
+    @property
+    def st_ctime_ns(self):
+        "time of last change in nanoseconds"
+        return int(self[9]) * 1000000000 + self.nsec_ctime
+
 
 class statvfs_result(metaclass=structseqtype):
 
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -364,56 +364,50 @@
 
 STAT_FIELDS = unrolling_iterable(enumerate(rposix_stat.STAT_FIELDS))
 
-N_INDEXABLE_FIELDS = 10
-
-def _time_ns_from_float(ftime):
-    "Convert a floating-point time (in seconds) into a (s, ns) pair of ints"
-    fracpart, intpart = modf(ftime)
-    if fracpart < 0:
-        fracpart += 1.
-        intpart -= 1.
-    return int(intpart), int(fracpart * 1e9)
-
- at specialize.arg(4)
-def _fill_time(space, lst, index, w_keywords, attrname, ftime):
-    stat_float_times = space.fromcache(StatState).stat_float_times
-    seconds, fractional_ns = _time_ns_from_float(ftime)
-    lst[index] = space.wrap(seconds)
-    if stat_float_times:
-        space.setitem(w_keywords, space.wrap(attrname), space.wrap(ftime))
-    else:
-        space.setitem(w_keywords, space.wrap(attrname), space.wrap(seconds))
-    w_billion = space.wrap(1000000000)
-    w_total_ns = space.add(space.mul(space.wrap(seconds), w_billion),
-                           space.wrap(fractional_ns))
-    space.setitem(w_keywords, space.wrap(attrname + '_ns'), w_total_ns)
-
-STANDARD_FIELDS = unrolling_iterable(enumerate(rposix_stat.STAT_FIELDS[:7]))
-EXTRA_FIELDS = unrolling_iterable(rposix_stat.STAT_FIELDS[10:])
+STATVFS_FIELDS = unrolling_iterable(enumerate(rposix_stat.STATVFS_FIELDS))
 
 def build_stat_result(space, st):
-    lst = [None] * N_INDEXABLE_FIELDS
+    FIELDS = STAT_FIELDS    # also when not translating at all
+    lst = [None] * rposix_stat.N_INDEXABLE_FIELDS
     w_keywords = space.newdict()
-    for (i, (name, TYPE)) in STANDARD_FIELDS:
-        value = getattr(st, name)
-        w_value = space.wrap(value)
-        lst[i] = w_value
+    stat_float_times = space.fromcache(StatState).stat_float_times
+    for i, (name, TYPE) in FIELDS:
+        if i < rposix_stat.N_INDEXABLE_FIELDS:
+            # get the first 10 items by indexing; this gives us
+            # 'st_Xtime' as an integer, too
+            w_value = space.wrap(st[i])
+            lst[i] = w_value
+        else:
+            try:
+                value = getattr(st, name)
+            except AttributeError:
+                # untranslated, there is no nsec_Xtime attribute
+                assert name.startswith('nsec_')
+                value = rposix_stat.get_stat_ns_as_bigint(st, name[5:])
+                value = value.tolong() % 1000000000
+            w_value = space.wrap(value)
+            space.setitem(w_keywords, space.wrap(name), w_value)
 
-    _fill_time(space, lst, 7, w_keywords, 'st_atime', st.st_atime)
-    _fill_time(space, lst, 8, w_keywords, 'st_mtime', st.st_mtime)
-    _fill_time(space, lst, 9, w_keywords, 'st_ctime', st.st_ctime)
+    # Note: 'w_keywords' contains the three attributes 'nsec_Xtime'.
+    # We have an app-level property in app_posix.stat_result to
+    # compute the full 'st_Xtime_ns' value.
 
-    for name, TYPE in EXTRA_FIELDS:
-        value = getattr(st, name)
-        w_value = space.wrap(value)
-        space.setitem(w_keywords, space.wrap(name), w_value)
+    # non-rounded values for name-based access
+    if stat_float_times:
+        space.setitem(w_keywords,
+                      space.wrap('st_atime'), space.wrap(st.st_atime))
+        space.setitem(w_keywords,
+                      space.wrap('st_mtime'), space.wrap(st.st_mtime))
+        space.setitem(w_keywords,
+                      space.wrap('st_ctime'), space.wrap(st.st_ctime))
+    #else:
+    #   filled by the __init__ method
 
     w_tuple = space.newtuple(lst)
     w_stat_result = space.getattr(space.getbuiltinmodule(os.name),
                                   space.wrap('stat_result'))
     return space.call_function(w_stat_result, w_tuple, w_keywords)
 
-STATVFS_FIELDS = unrolling_iterable(enumerate(rposix_stat.STATVFS_FIELDS))
 
 def build_statvfs_result(space, st):
     vals_w = [None] * len(rposix_stat.STATVFS_FIELDS)
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -126,9 +126,9 @@
         assert st[4] == st.st_uid
         assert st[5] == st.st_gid
         assert st[6] == st.st_size
-        assert st[7] == int(st.st_atime)
-        assert st[8] == int(st.st_mtime)
-        assert st[9] == int(st.st_ctime)
+        assert st[7] == int(st.st_atime)   # in complete corner cases, rounding
+        assert st[8] == int(st.st_mtime)   # here could maybe get the wrong
+        assert st[9] == int(st.st_ctime)   # integer...
 
         assert stat.S_IMODE(st.st_mode) & stat.S_IRUSR
         assert stat.S_IMODE(st.st_mode) & stat.S_IWUSR
@@ -138,15 +138,17 @@
         assert st.st_size == 14
         assert st.st_nlink == 1
 
-        #if sys.platform.startswith('linux'):
-        #    # expects non-integer timestamps - it's unlikely that they are
-        #    # all three integers
-        #    assert ((st.st_atime, st.st_mtime, st.st_ctime) !=
-        #            (st[7],       st[8],       st[9]))
-        #    assert st.st_blksize * st.st_blocks >= st.st_size
         if sys.platform.startswith('linux'):
+            assert isinstance(st.st_atime, float)
+            assert isinstance(st.st_mtime, float)
+            assert isinstance(st.st_ctime, float)
             assert hasattr(st, 'st_rdev')
 
+        assert isinstance(st.st_atime_ns, int)
+        assert abs(st.st_atime_ns - 1e9*st.st_atime) < 500
+        assert abs(st.st_mtime_ns - 1e9*st.st_mtime) < 500
+        assert abs(st.st_ctime_ns - 1e9*st.st_ctime) < 500
+
     def test_stat_float_times(self):
         path = self.path
         posix = self.posix
diff --git a/pypy/module/sys/vm.py b/pypy/module/sys/vm.py
--- a/pypy/module/sys/vm.py
+++ b/pypy/module/sys/vm.py
@@ -242,13 +242,33 @@
     from rpython.rtyper.lltypesystem import lltype, rffi
     return space.wrap(rffi.cast(lltype.Signed, handle))
 
+getsizeof_missing = """sys.getsizeof() is not implemented on PyPy.
+
+A memory profiler using this function is most likely to give results
+inconsistent with reality on PyPy.  It would be possible to have
+sys.getsizeof() return a number (with enough work), but that may or
+may not represent how much memory the object uses.  It doesn't even
+make really sense to ask how much *one* object uses, in isolation
+with the rest of the system.  For example, instances have maps,
+which are often shared across many instances; in this case the maps
+would probably be ignored by an implementation of sys.getsizeof(),
+but their overhead is important in some cases if they are many
+instances with unique maps.  Conversely, equal strings may share
+their internal string data even if they are different objects---or
+empty containers may share parts of their internals as long as they
+are empty.  Even stranger, some lists create objects as you read
+them; if you try to estimate the size in memory of range(10**6) as
+the sum of all items' size, that operation will by itself create one
+million integer objects that never existed in the first place.
+"""
+
 def getsizeof(space, w_object, w_default=None):
-    """Not implemented on PyPy."""
     if w_default is None:
-        raise oefmt(space.w_TypeError,
-                    "sys.getsizeof() not implemented on PyPy")
+        raise oefmt(space.w_TypeError, getsizeof_missing)
     return w_default
 
+getsizeof.__doc__ = getsizeof_missing
+
 def intern(space, w_str):
     """``Intern'' the given string.  This enters the string in the (global)
 table of interned strings whose purpose is to speed up dictionary lookups.
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_ffi_backend.py
@@ -480,3 +480,7 @@
         assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
         p = ffi.new("int[]", [-123456789])
         assert ffi.unpack(p, 1) == [-123456789]
+
+    def test_negative_array_size(self):
+        ffi = FFI()
+        py.test.raises(ValueError, ffi.cast, "int[-5]", 0)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_ffi_obj.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_ffi_obj.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_ffi_obj.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_ffi_obj.py
@@ -503,3 +503,7 @@
     assert ffi.unpack(p+1, 7) == b"bc\x00def\x00"
     p = ffi.new("int[]", [-123456789])
     assert ffi.unpack(p, 1) == [-123456789]
+
+def test_negative_array_size():
+    ffi = _cffi1_backend.FFI()
+    py.test.raises(ffi.error, ffi.cast, "int[-5]", 0)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
@@ -1981,3 +1981,29 @@
         static struct aaa f1(int x) { struct aaa s = {0}; s.a = x; return s; }
     """)
     assert lib.f1(52).a == 52
+
+def test_typedef_array_dotdotdot():
+    ffi = FFI()
+    ffi.cdef("""
+        typedef int foo_t[...], bar_t[...];
+        int gv[...];
+        typedef int mat_t[...][...];
+        typedef int vmat_t[][...];
+        """)
+    lib = verify(ffi, "test_typedef_array_dotdotdot", """
+        typedef int foo_t[50], bar_t[50];
+        int gv[23];
+        typedef int mat_t[6][7];
+        typedef int vmat_t[][8];
+    """)
+    assert ffi.sizeof("foo_t") == 50 * ffi.sizeof("int")
+    assert ffi.sizeof("bar_t") == 50 * ffi.sizeof("int")
+    assert len(ffi.new("foo_t")) == 50
+    assert len(ffi.new("bar_t")) == 50
+    assert ffi.sizeof(lib.gv) == 23 * ffi.sizeof("int")
+    assert ffi.sizeof("mat_t") == 6 * 7 * ffi.sizeof("int")
+    assert len(ffi.new("mat_t")) == 6
+    assert len(ffi.new("mat_t")[3]) == 7
+    py.test.raises(ffi.error, ffi.sizeof, "vmat_t")
+    p = ffi.new("vmat_t", 4)
+    assert ffi.sizeof(p[3]) == 8 * ffi.sizeof("int")
diff --git a/pypy/module/time/interp_time.py b/pypy/module/time/interp_time.py
--- a/pypy/module/time/interp_time.py
+++ b/pypy/module/time/interp_time.py
@@ -3,7 +3,7 @@
 from pypy.interpreter.error import OperationError, oefmt, strerror as _strerror, exception_from_saved_errno
 from pypy.interpreter.gateway import unwrap_spec
 from rpython.rtyper.lltypesystem import lltype
-from rpython.rlib.rarithmetic import intmask, r_ulonglong, r_longfloat
+from rpython.rlib.rarithmetic import intmask, r_ulonglong, r_longfloat, widen
 from rpython.rlib.rtime import (TIMEB, c_ftime,
                                 GETTIMEOFDAY_NO_TZ, TIMEVAL,
                                 HAVE_GETTIMEOFDAY, HAVE_FTIME)
@@ -290,12 +290,14 @@
                 if rffi.cast(rffi.LONG, errcode) == 0:
                     if w_info is not None:
                         _setinfo(space, w_info, "gettimeofday()", 1e-6, False, True)
-                    return space.wrap(timeval.c_tv_sec + timeval.c_tv_usec * 1e-6)
+                    return space.wrap(
+                        widen(timeval.c_tv_sec) +
+                        widen(timeval.c_tv_usec) * 1e-6)
         if HAVE_FTIME:
             with lltype.scoped_alloc(TIMEB) as t:
                 c_ftime(t)
-                result = (float(intmask(t.c_time)) +
-                          float(intmask(t.c_millitm)) * 0.001)
+                result = (widen(t.c_time) +
+                          widen(t.c_millitm) * 0.001)
                 if w_info is not None:
                     _setinfo(space, w_info, "ftime()", 1e-3,
                              False, True) 
@@ -535,7 +537,7 @@
         if not allowNone:
             raise oefmt(space.w_TypeError, "tuple expected")
         # default to the current local time
-        tt = rffi.r_time_t(int(pytime.time()))
+        tt = rffi.cast(rffi.TIME_T, pytime.time())
         t_ref = lltype.malloc(rffi.TIME_TP.TO, 1, flavor='raw')
         t_ref[0] = tt
         pbuf = c_localtime(t_ref)
@@ -737,7 +739,7 @@
 
 if HAS_CLOCK_GETTIME:
     def _timespec_to_seconds(timespec):
-        return int(timespec.c_tv_sec) + int(timespec.c_tv_nsec) * 1e-9
+        return widen(timespec.c_tv_sec) + widen(timespec.c_tv_nsec) * 1e-9
 
     @unwrap_spec(clk_id='c_int')
     def clock_gettime(space, clk_id):
@@ -751,8 +753,9 @@
     @unwrap_spec(clk_id='c_int', secs=float)
     def clock_settime(space, clk_id, secs):
         with lltype.scoped_alloc(TIMESPEC) as timespec:
-            frac = math.fmod(secs, 1.0)
-            rffi.setintfield(timespec, 'c_tv_sec', int(secs))
+            integer_secs = rffi.cast(TIMESPEC.c_tv_sec, secs)
+            frac = secs - widen(integer_secs)
+            rffi.setintfield(timespec, 'c_tv_sec', integer_secs)
             rffi.setintfield(timespec, 'c_tv_nsec', int(frac * 1e9))
             ret = c_clock_settime(clk_id, timespec)
             if ret != 0:
@@ -1051,16 +1054,15 @@
             return win_perf_counter(space, w_info=w_info)
         except ValueError:
             pass
-    value = _clock()
-    # Is this casting correct?
-    if intmask(value) == intmask(rffi.cast(rposix.CLOCK_T, -1)):
+    value = widen(_clock())
+    if value == widen(rffi.cast(rposix.CLOCK_T, -1)):
         raise oefmt(space.w_RuntimeError,
                     "the processor time used is not available or its value"
                     "cannot be represented")
     if w_info is not None:
         _setinfo(space, w_info,
                  "clock()", 1.0 / CLOCKS_PER_SEC, True, False)
-    return space.wrap((1.0 * value) / CLOCKS_PER_SEC)
+    return space.wrap(float(value) / CLOCKS_PER_SEC)
 
 
 def _setinfo(space, w_info, impl, res, mono, adj):
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -23,7 +23,6 @@
 from pypy.objspace.std.util import get_positive_index
 from pypy.objspace.std.formatting import mod_format, FORMAT_BYTEARRAY
 
-NON_HEX_MSG = "non-hexadecimal number found in fromhex() arg at position %d"
 
 
 class W_BytearrayObject(W_Root):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -288,6 +288,10 @@
             return W_SmallLongObject.fromint(val)
         return W_LongObject.fromint(self, val)
 
+    @specialize.argtype(1)
+    def newlong_from_rarith_int(self, val): # val is an rarithmetic type 
+        return W_LongObject.fromrarith_int(val)
+
     def newlong_from_rbigint(self, val):
         return newlong(self, val)
 
diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -26,7 +26,6 @@
         space.raises_w(space.w_OverflowError, space.float_w, w_big)
 
     def test_rint_variants(self):
-        py.test.skip("XXX broken!")
         from rpython.rtyper.tool.rfficache import platform
         space = self.space
         for r in platform.numbertype_to_rclass.values():
@@ -37,8 +36,8 @@
             for x in values:
                 if not r.SIGNED:
                     x &= r.MASK
-                w_obj = space.wrap(r(x))
-                assert space.bigint_w(w_obj).eq(rbigint.fromint(x))
+                w_obj = space.newlong_from_rarith_int(r(x))
+                assert space.bigint_w(w_obj).eq(rbigint.fromlong(x))
 
 
 class AppTestLong:
diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py
--- a/pypy/objspace/std/test/test_typeobject.py
+++ b/pypy/objspace/std/test/test_typeobject.py
@@ -805,9 +805,7 @@
         class AA(object):
             __slots__ = ('a',)
         aa = AA()
-        # the following line works on CPython >= 2.6 but not on PyPy.
-        # but see below for more
-        raises(TypeError, "aa.__class__ = A")
+        aa.__class__ = A
         raises(TypeError, "aa.__class__ = object")
         class Z1(A):
             pass
@@ -869,9 +867,13 @@
             __slots__ = ['a', 'b']
         class Order2(object):
             __slots__ = ['b', 'a']
-        # the following line works on CPython >= 2.6 but not on PyPy.
-        # but see below for more
-        raises(TypeError, "Order1().__class__ = Order2")
+        Order1().__class__ = Order2
+
+        # like CPython, the order of slot names doesn't matter
+        x = Order1()
+        x.a, x.b = 1, 2
+        x.__class__ = Order2
+        assert (x.a, x.b) == (1, 2)
 
         class U1(object):
             __slots__ = ['a', 'b']
@@ -881,10 +883,11 @@
             __slots__ = ['a', 'b']
         class V2(V1):
             __slots__ = ['c', 'd', 'e']
-        # the following line does not work on CPython >= 2.6 either.
-        # that's just obscure.  Really really.  So we just ignore
-        # the whole issue until someone comes complaining.  Then we'll
-        # just kill slots altogether apart from maybe doing a few checks.
+        # the following line does not work on CPython either: we can't
+        # change a class if the old and new class have different layouts
+        # that look compatible but aren't, because they don't have the
+        # same base-layout class (even if these base classes are
+        # themselves compatible)...  obscure.
         raises(TypeError, "U2().__class__ = V2")
 
     def test_name(self):
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -103,9 +103,10 @@
     """
     _immutable_ = True
 
-    def __init__(self, typedef, nslots, base_layout=None):
+    def __init__(self, typedef, nslots, newslotnames=[], base_layout=None):
         self.typedef = typedef
         self.nslots = nslots
+        self.newslotnames = newslotnames[:]    # make a fixed-size list
         self.base_layout = base_layout
 
     def issublayout(self, parent):
@@ -115,6 +116,12 @@
                 return False
         return True
 
+    def expand(self, hasdict, weakrefable):
+        """Turn this Layout into a tuple.  If two classes get equal
+        tuples, it means their instances have a fully compatible layout."""
+        return (self.typedef, self.newslotnames, self.base_layout,
+                hasdict, weakrefable)
+
 
 # possible values of compares_by_identity_status
 UNKNOWN = 0
@@ -287,8 +294,7 @@
 
     # compute a tuple that fully describes the instance layout
     def get_full_instance_layout(self):
-        layout = self.layout
-        return (layout, self.hasdict, self.weakrefable)
+        return self.layout.expand(self.hasdict, self.weakrefable)
 
     def compute_default_mro(self):
         return compute_C3_mro(self.space, self)
@@ -1022,11 +1028,15 @@
         w_self.weakrefable = w_self.weakrefable or w_base.weakrefable
     return hasoldstylebase
 
+
 def create_all_slots(w_self, hasoldstylebase, w_bestbase, force_new_layout):
+    from pypy.objspace.std.listobject import StringSort
+
     base_layout = w_bestbase.layout
     index_next_extra_slot = base_layout.nslots
     space = w_self.space
     dict_w = w_self.dict_w
+    newslotnames = []
     if '__slots__' not in dict_w:
         wantdict = True
         wantweakref = True
@@ -1052,9 +1062,22 @@
                                 "__weakref__ slot disallowed: we already got one")
                 wantweakref = True
             else:
-                index_next_extra_slot = create_slot(w_self, w_slot_name,
-                                                    slot_name,
-                                                    index_next_extra_slot)
+                newslotnames.append(slot_name)
+        # Sort the list of names collected so far
+        sorter = StringSort(newslotnames, len(newslotnames))
+        sorter.sort()
+        # Try to create all slots in order.  The creation of some of
+        # them might silently fail; then we delete the name from the
+        # list.  At the end, 'index_next_extra_slot' has been advanced
+        # by the final length of 'newslotnames'.
+        i = 0
+        while i < len(newslotnames):
+            if create_slot(w_self, newslotnames[i], index_next_extra_slot):
+                index_next_extra_slot += 1
+                i += 1
+            else:
+                del newslotnames[i]
+    #
     wantdict = wantdict or hasoldstylebase
     if wantdict:
         create_dict_slot(w_self)
@@ -1063,35 +1086,34 @@
     if '__del__' in dict_w:
         w_self.hasuserdel = True
     #
+    assert index_next_extra_slot == base_layout.nslots + len(newslotnames)
     if index_next_extra_slot == base_layout.nslots and not force_new_layout:
         return base_layout
     else:
         return Layout(base_layout.typedef, index_next_extra_slot,
-                      base_layout=base_layout)
+                      newslotnames, base_layout=base_layout)
 
-def create_slot(w_self, w_slot_name, slot_name, index_next_extra_slot):
+def create_slot(w_self, slot_name, index_next_extra_slot):
     space = w_self.space
     if not valid_slot_name(slot_name):
         raise oefmt(space.w_TypeError, "__slots__ must be identifiers")
     # create member
     slot_name = mangle(slot_name, w_self.name)
-    if slot_name in w_self.dict_w:
-        w_prev = w_self.dict_w[slot_name]
-        if isinstance(w_prev, Member) and w_prev.w_cls is w_self:
-            pass   # special case: duplicate __slots__ entry, ignored
-                   # (e.g. occurs in datetime.py, fwiw)
-        else:
-            raise oefmt(space.w_ValueError,
-                        "%R in __slots__ conflicts with class variable",
-                        w_slot_name)
-    else:
+    if slot_name not in w_self.dict_w:
         # Force interning of slot names.
         slot_name = space.str_w(space.new_interned_str(slot_name))
         # in cpython it is ignored less, but we probably don't care
         member = Member(index_next_extra_slot, slot_name, w_self)
-        index_next_extra_slot += 1
         w_self.dict_w[slot_name] = space.wrap(member)
-    return index_next_extra_slot
+        return True
+    else:
+        w_prev = w_self.dict_w[slot_name]
+        if isinstance(w_prev, Member) and w_prev.w_cls is w_self:
+            return False   # special case: duplicate __slots__ entry, ignored
+                           # (e.g. occurs in datetime.py, fwiw)
+        raise oefmt(space.w_ValueError,
+                    "'%8' in __slots__ conflicts with class variable",
+                    slot_name)
 
 def create_dict_slot(w_self):
     if not w_self.hasdict:
diff --git a/rpython/doc/jit/backend.rst b/rpython/doc/jit/backend.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/jit/backend.rst
@@ -0,0 +1,263 @@
+=========================
+PyPy's assembler backends
+=========================
+
+Draft notes about the organization of assembler backends in the PyPy JIT, in 2016
+=================================================================================
+
+
+input: linear sequence of instructions, called a "trace".
+
+A trace is a sequence of instructions in SSA form.  Most instructions
+correspond to one or a few CPU-level instructions.  There are a few
+meta-instructions like `label` and debugging stuff.  All branching is
+done with guards, which are instructions that check that a condition is
+true and exit the trace if not.  A failing guard can have a new trace
+added to it later, called a "bridge".  A patched guard becomes a direct
+`Jcond` instruction going to the bridge, with no indirection, no
+register spilling, etc.
+
+A trace ends with either a `return` or a `jump to label`.  The target
+label is either inside the same trace, or in some older one.  For
+historical reasons we call a "loop" a trace that is not a bridge.  The
+machine code that we generate is organized as a forest of trees; the
+trunk of the tree is a "loop", and the branches are all bridges
+(branching off the trunk or off another branch).
+
+* every trunk or branch that ends in a `jump to label` can target a
+  label from a different tree, too.
+
+* the whole process of assembling a loop or a branch is basically
+  single-threaded, so no synchronization issue there (including to patch
+  older generated instructions).
+
+* the generated assembler has got a "frame" in %rbp, which is actually
+  not on the stack at all, but is a GC object (called a "jitframe").
+  Spilling goes there.
+
+* the guards are `Jcond` to a very small piece of generated code, which
+  is basically pushing a couple of constants on the stack and then
+  jumping to the general guard-recovery code.  That code will save the
+  registers into the jitframe and then exit the whole generated
+  function.  The caller of that generated function checks how it
+  finished: if it finished by hitting a guard, then the caller is
+  responsible for calling the "blackhole interpreter".  This is the part
+  of the front-end that recovers from failing guards and finishes
+  running the frame (including, possibly, by jumping again into
+  generated assembler).
+
+
+Details about the JITting process:
+
+* front-end and optimization pass
+
+* rewrite (includes gc related transformation as well as simplifactions)
+
+* assembler generation
+
+
+Front-end and optimization pass
+-------------------------------
+
+Not discussed here in detail.  This produces loops and bridges using an
+instruction set that is "high-level" in some sense: it contains
+intructions like "new"/"new_array", and
+"setfield"/"setarrayitem"/"setinteriorfield" which describe the action
+of storing a value in a precise field of the structure or array.  For
+example, the "setfield" action might require implicitly a GC write
+barrier.  This is the high-level trace that we send to the following
+step.
+
+
+Rewrite
+-------
+
+A mostly but not completely CPU-independent phase: lowers some
+instructions.  For example, the variants of "new" are lowered to
+"malloc" and a few "gc_store": it bumps the pointer of the GC and then
+sets a few fields explicitly in the newly allocated structure.  The
+"setfield" is replaced with a "cond_gc_wb_call" (conditional call to the
+write barrier) if needed, followed by a "gc_store".
+
+The "gc_store" instruction can be encoded in a single MOV assembler
+instruction, but is not as flexible as a MOV.  The address is always
+specified as "some GC pointer + an offset".  We don't have the notion of
+interior pointer for GC objects.
+
+A different instruction, "gc_store_indexed", offers additional operands,
+which can be mapped to a single MOV instruction using forms like
+`[rax+8*rcx+24]`.
+
+Some other complex instructions pass through to the backend, which must
+deal with them: for example, "card marking" in the GC.  (Writing an
+object pointer inside an array would require walking the whole array
+later to find "young" references. Instead of that, we flip a bit for
+every range of 128 entries.  This is a common GC optimization.)  Setting
+the card bit of a GC object requires a sequence of assembler
+instructions that depends too much on the target CPU to be expressed
+explicitly here (moreover, it contains a few branches, which are hard to
+express at this level).
+
+
+Assembly
+--------
+
+No fancy code generation technique, but greedy forward pass that tries
+to avoid some pitfalls
+
+
+Handling instructions
+~~~~~~~~~~~~~~~~~~~~~
+
+* One by one (forward direction).   Each instruction asks the register
+  allocator to ensure that some arguments are in registers (not in the
+  jitframe); asks for a register to put its result into; and asks for
+  additional scratch registers that will be freed at  the end of the
+  instruction.  There is a special case for boolean variables: they are
+  stored in the condition code flags instead of being materialized as a
+  0/1 value.  (They are materialized later, except in the common case
+  where they are only used by the next `guard_false` or `guard_true` and
+  then forgotten.)
+
+* Instruction arguments are loaded into a register on demand.  This
+  makes the backend quite easy to write, but leads do some bad
+  decisions.
+
+
+Linear scan register allocation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Although it's always a linear trace that we consider, we don't use
+advanced techniques for register allocation: we do forward, on-demand
+allocation as the backend produces the assembler.  When it asks for a
+register to put some value into, we give it any free register, without
+consideration for what will be done with it later.  We compute the
+longevity of all variables, but only use it when choosing which register
+to spill (we spill the variable with the longest longevity).
+
+This works to some extend because it is well integrated with the earlier
+optimization pass. Loops are unrolled once by the optimization pass to
+allow more powerful optimizations---the optimization pass itself is the
+place that benefits the most, but it also has benefits here in the
+assembly pass.  These are:
+
+* The first peeling initializes the register binding on the first use.
+
+* This leads to an already allocated register of the trace loop.
+
+* As well as allocated registers when exiting bridges
+
+[Try to better allocate registers to match the ABI (minor to non benefit
+in the current state)]
+
+
+More complex mappings
+~~~~~~~~~~~~~~~~~~~~~
+
+Some instructions generate more complex code.  These are either or both of:
+
+* complex instructions generating some local control flow, like
+  "cond_gc_wb_call" (for write barriers), "call_assembler" (a call
+  followed by a few checks).
+
+* instructions that invoke custom assembler helpers, like the slow-path
+  of write barriers or the slow-path of allocations.  These slow-paths
+  are typically generated too, so that we are not constrained by the
+  usual calling conventions.
+
+
+GC pointers
+~~~~~~~~~~~
+
+Around most CALL instructions, we need to record a description of where
+the GC pointers are (registers and stack frame).  This is needed in case
+the CALL invokes a garbage collection.  The GC pointers can move; the
+pointers in the registers and stack frame are updated by the GC.  That's
+a reason for why we don't have explicit interior pointers.
+
+GC pointers can appear as constants in the trace.  We are busy changing
+that to use a constant table and `MOV REG, (%RIP+offset)`.  The
+"constant" in the table is actually updated by the GC if the object
+move.
+
+
+Vectorization
+~~~~~~~~~~~~~
+
+Optimization developed to use SIMD instructions for trace loops. Primary
+idea was to use it as an optimization of micro numpy. It has several
+passes on the already optimized trace.
+
+Shortly explained: It builds dependencies for an unrolled trace loop,
+gathering pairs/packs of operations that could be executed in parallel
+and finally schedules the operations.
+
+What did it add to the code base:
+
+* Dependencies can be constructed
+
+* Code motion of guards to relax dependencies
+
+* Scheduler to reorder trace
+
+* Array bound check removal (especially for unrolled traces)
+
+What can it do:
+
+* Transform vector loops (element wise operations)
+
+* Accumulation (`reduce([...],operator,0)`). Requires Operation to be
+  associative and commutative
+
+* SSE 4.1 as "vector backend"
+
+
+We do not
+~~~~~~~~~
+
+* Keep tracing data around to reoptimize the trace tree. (Once a trace
+  is compiled, minimal data is kept.)  This is one reason (there are
+  others in the front-end) for the following result: JIT-compiling a
+  small loop with two common paths ends up as one "loop" and one bridge
+  assembled, and the bridge-following path is slightly less efficient.
+  This is notably because this bridge is assembled with two constraints:
+  the input registers are fixed (from the guard), and the output
+  registers are fixed (from the jump target); usually these two sets of
+  fixed registers are different, and copying around is needed.
+
+* We don't join trace tails: we only assemble *trees*.
+
+* We don't do any reordering (neither of trace instructions nor of
+  individual assembler instructions)
+
+* We don't do any cross-instruction optimization that makes sense only
+  for the backend and can't easily be expressed at a higher level.  I'm
+  sure there are tons of examples of that, but e.g. loading a large
+  constant in a register that will survive for several instructions;
+  moving out of loops *parts* of some instruction like the address
+  calculation; etc. etc.
+
+* Other optimization opportunities I can think about: look at the
+  function prologue/epilogue; look at the overhead (small but not zero)
+  at the start of a bridge.  Also check if the way guards are
+  implemented makes sense.  Also, we generate large-ish sequences of
+  assembler instructions with tons of `Jcond` that are almost never
+  followed; any optimization opportunity there?  (They all go forward,
+  if it changes anything.)  In theory we could also replace some of
+  these with a signal handler on segfault (e.g. `guard_nonnull_class`).
+
+
+a GCC or LLVM backend?
+~~~~~~~~~~~~~~~~~~~~~~
+
+At least for comparison we'd like a JIT backend that emits its code
+using GCC or LLVM (irrespective of the time it would take).  But it's
+hard to map reasonably well the guards to the C language or to LLVM IR.
+The problems are: (1) we have many guards, we would like to avoid having
+many paths that each do a full
+saving-all-local-variables-that-are-still-alive; (2) it's hard to patch
+a guard when a bridge is compiled from it; (3) instructions like a CALL
+need to expose the local variables that are GC pointers; CALL_MAY_FORCE
+need to expose *all* local variables for optional off-line
+reconstruction of the interpreter state.
+
diff --git a/rpython/doc/jit/index.rst b/rpython/doc/jit/index.rst
--- a/rpython/doc/jit/index.rst
+++ b/rpython/doc/jit/index.rst
@@ -26,6 +26,7 @@
    optimizer
    virtualizable
    vectorization
+   backend
 
 - :doc:`Overview <overview>`: motivating our approach
 
@@ -34,5 +35,8 @@
 - :doc:`Optimizer <optimizer>`: the step between tracing and writing
   machine code
 
-- :doc:`Virtulizable <virtualizable>` how virtualizables work and what they are
-  (in other words how to make frames more efficient).
+- :doc:`Virtualizable <virtualizable>`: how virtualizables work and what
+  they are (in other words how to make frames more efficient).
+
+- :doc:`Assembler backend <backend>`: draft notes about the organization
+  of the assembler backends
diff --git a/rpython/jit/backend/test/test_ll_random.py b/rpython/jit/backend/test/test_ll_random.py
--- a/rpython/jit/backend/test/test_ll_random.py
+++ b/rpython/jit/backend/test/test_ll_random.py
@@ -710,6 +710,12 @@
 
 # 6. a conditional call (for now always with no exception raised)
 class CondCallOperation(BaseCallOperation):
+
+    def filter(self, builder):
+        if not builder.cpu.supports_cond_call_value and \
+           self.opnum == rop.COND_CALL_VALUE_I:
+            raise CannotProduceOperation
+
     def produce_into(self, builder, r):
         fail_subset = builder.subset_of_intvars(r)
         if self.opnum == rop.COND_CALL:
diff --git a/rpython/jit/backend/test/zll_stress.py b/rpython/jit/backend/test/zll_stress.py
--- a/rpython/jit/backend/test/zll_stress.py
+++ b/rpython/jit/backend/test/zll_stress.py
@@ -1,6 +1,7 @@
 from rpython.jit.backend.test.test_random import check_random_function, Random
 from rpython.jit.backend.test.test_ll_random import LLtypeOperationBuilder
 from rpython.jit.backend.detect_cpu import getcpuclass
+from rpython.jit.metainterp.resoperation import rop
 import platform
 
 CPU = getcpuclass()
diff --git a/rpython/jit/codewriter/jtransform.py b/rpython/jit/codewriter/jtransform.py
--- a/rpython/jit/codewriter/jtransform.py
+++ b/rpython/jit/codewriter/jtransform.py
@@ -200,8 +200,12 @@
             or v.concretetype != lltype.Bool):
             return False
         for op in block.operations[::-1]:
-            if v in op.args:
-                return False   # variable is also used in cur block
+            # check if variable is used in block
+            for arg in op.args:
+                if arg == v:
+                    return False
+                if isinstance(arg, ListOfKind) and v in arg.content:
+                    return False
             if v is op.result:
                 if op.opname not in ('int_lt', 'int_le', 'int_eq', 'int_ne',
                                      'int_gt', 'int_ge',
diff --git a/rpython/jit/codewriter/test/test_jtransform.py b/rpython/jit/codewriter/test/test_jtransform.py
--- a/rpython/jit/codewriter/test/test_jtransform.py
+++ b/rpython/jit/codewriter/test/test_jtransform.py
@@ -243,6 +243,20 @@
         assert block.exitswitch == (opname, v1, '-live-before')
         assert block.exits == exits
 
+def test_optimize_goto_if_not__argument_to_call():
+    for opname in ['ptr_iszero', 'ptr_nonzero']:
+        v1 = Variable()
+        v3 = Variable(); v3.concretetype = lltype.Bool
+        v4 = Variable()
+        block = Block([v1])
+        callop = SpaceOperation('residual_call_r_i',
+                ["fake", ListOfKind('int', [v3])], v4)
+        block.operations = [SpaceOperation(opname, [v1], v3), callop]
+        block.exitswitch = v3
+        block.exits = exits = [FakeLink(False), FakeLink(True)]
+        res = Transformer().optimize_goto_if_not(block)
+        assert not res
+
 def test_symmetric():
     ops = {'int_add': 'int_add',
            'int_or': 'int_or',
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_virtualstate.py
@@ -90,18 +90,23 @@
         assert vs.make_inputargs(args, optimizer) == []
 
     def test_make_inputargs_2(self):
-        # Ensure that make_inputargs properly errors with VirtualStatesCantMatch
-        # when the type information for a virtual field conflicts. In practice the
-        # expected and given field always share a common subclass.
-        # This check is needed as not all paths to make_inputargs in unroll.py
-        # are guarded by a call to generate_guards.
+        # Ensure that make_inputargs does not error when the lengths of the fields
+        # for the runtime box does not match what the virtual state expected.
+        # This can occur in unroll.py, as not all paths to make_inputargs are
+        # guareded with a generalization_of check. The property is validated
+        # subsequently in all cases, so we just need to ensure that this case does
+        # not cause segfaults.
         optimizer = FakeOptimizer(self.cpu)
         classbox1 = self.cpu.ts.cls_of_box(InputArgRef(self.nodeaddr))
-        innervalue1 = info.InstancePtrInfo(known_class=classbox1, is_virtual=True, descr=self.valuedescr.get_parent_descr())
+        innervalue1 = info.InstancePtrInfo(
+                known_class=classbox1, is_virtual=True,
+                descr=self.valuedescr.get_parent_descr())
         for field in self.valuedescr.get_parent_descr().get_all_fielddescrs():
             innervalue1.setfield(field, None, ConstInt(42))
         classbox2 = self.cpu.ts.cls_of_box(InputArgRef(self.myptr3))
-        innervalue2 = info.InstancePtrInfo(known_class=classbox2, is_virtual=True, descr=self.valuedescr3.get_parent_descr())
+        innervalue2 = info.InstancePtrInfo(
+                known_class=classbox2, is_virtual=True,
+                descr=self.valuedescr3.get_parent_descr())
         for field in self.valuedescr3.get_parent_descr().get_all_fielddescrs():
             innervalue2.setfield(field, None, ConstInt(42))
 
@@ -111,10 +116,14 @@
         nodebox2.set_forwarded(innervalue2)
 
         constr = VirtualStateConstructor(optimizer)
-        vs = constr.get_virtual_state([nodebox1])
+        vs1 = constr.get_virtual_state([nodebox1])
+        constr = VirtualStateConstructor(optimizer)
+        vs2 = constr.get_virtual_state([nodebox2])
 
-        with py.test.raises(VirtualStatesCantMatch):
-            args = vs.make_inputargs([nodebox2], optimizer, force_boxes=True)
+        # This should succeed with no exceptions
+        vs1.make_inputargs([nodebox2], optimizer, force_boxes=False)
+        assert not vs1.generalization_of(vs2, optimizer)
+        assert not vs2.generalization_of(vs1, optimizer)
 
     def test_position_generalization(self):
         def postest(info1, info2):
diff --git a/rpython/jit/metainterp/optimizeopt/unroll.py b/rpython/jit/metainterp/optimizeopt/unroll.py
--- a/rpython/jit/metainterp/optimizeopt/unroll.py
+++ b/rpython/jit/metainterp/optimizeopt/unroll.py
@@ -167,7 +167,8 @@
                [self.get_box_replacement(x) for x in end_jump.getarglist()],
                self.optimizer, force_boxes=True)
             for arg in args:
-                self.optimizer.force_box(arg)
+                if arg is not None:
+                    self.optimizer.force_box(arg)
         except VirtualStatesCantMatch:
             raise InvalidLoop("Virtual states did not match "
                               "after picking the virtual state, when forcing"
diff --git a/rpython/jit/metainterp/optimizeopt/virtualstate.py b/rpython/jit/metainterp/optimizeopt/virtualstate.py
--- a/rpython/jit/metainterp/optimizeopt/virtualstate.py
+++ b/rpython/jit/metainterp/optimizeopt/virtualstate.py
@@ -177,14 +177,6 @@
     def _generalization_of_structpart(self, other):
         raise NotImplementedError
 
-    @staticmethod
-    def descr_issubclass(descr1, descr2, optimizer):
-        if not descr1.is_object() or not descr2.is_object():
-            return True
-        vtable1 = descr1.get_vtable()
-        vtable2 = descr2.get_vtable()
-        return optimizer._check_subclass(vtable1, vtable2)
-
     def enum_forced_boxes(self, boxes, box, optimizer, force_boxes=False):
         box = optimizer.get_box_replacement(box)
         info = optimizer.getptrinfo(box)
@@ -193,13 +185,12 @@
         else:
             assert isinstance(info, AbstractStructPtrInfo)