From pypy.commits at gmail.com  Wed Nov  1 05:52:52 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 01 Nov 2017 02:52:52 -0700 (PDT)
Subject: [pypy-commit] pypy keep-debug-symbols: close branch to be merged
Message-ID: <59f99974.4e9d1c0a.3489.17c7@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: keep-debug-symbols
Changeset: r92894:f29f0f12ffa8
Date: 2017-11-01 10:45 +0100
http://bitbucket.org/pypy/pypy/changeset/f29f0f12ffa8/

Log:	close branch to be merged


From pypy.commits at gmail.com  Wed Nov  1 05:52:54 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 01 Nov 2017 02:52:54 -0700 (PDT)
Subject: [pypy-commit] pypy default: merge the branch keep-debug-symbols:
Message-ID: <59f99976.87271c0a.9cae7.0b5d@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r92895:77fff565d382
Date: 2017-11-01 10:51 +0100
http://bitbucket.org/pypy/pypy/changeset/77fff565d382/

Log:	merge the branch keep-debug-symbols:

	- symbols are stripped from the executable and placed in a file
	libpypy-c.so.debug

	- we add a gnu-debug-link section to libpypy-c.so which points to
	.debug, so that it works transparently in gdb

	- this generates immensely more useful stack trace inside gdb;
	moreover, it is also potentially usable by vmprof

	- the .debug file is ~18MB. The tarball size goes from 22MB to 25MB.
	I claim that disk space and bandwidth are cheap, so we should just
	don't care, especially for nightly builds

	- if we REALLY care about the tarball size of official releases, we
	can simply remove the .debug from the tarball

diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -21,6 +21,7 @@
 import fnmatch
 import subprocess
 import glob
+from pypy.tool.release.smartstrip import smartstrip
 
 if sys.version_info < (2,6): py.test.skip("requires 2.6 so far")
 
@@ -212,15 +213,8 @@
     old_dir = os.getcwd()
     try:
         os.chdir(str(builddir))
-        if not options.nostrip:
-            for source, target in binaries:
-                if sys.platform == 'win32':
-                    pass
-                elif sys.platform == 'darwin':
-                    # 'strip' fun: see issue #587 for why -x
-                    os.system("strip -x " + str(bindir.join(target)))    # ignore errors
-                else:
-                    os.system("strip " + str(bindir.join(target)))    # ignore errors
+        for source, target in binaries:
+            smartstrip(bindir.join(target), keep_debug=options.keep_debug)
         #
         if USE_ZIPFILE_MODULE:
             import zipfile
@@ -281,8 +275,8 @@
                     help='do not build and package the %r cffi module' % (key,))
     parser.add_argument('--without-cffi', dest='no_cffi', action='store_true',
         help='skip building *all* the cffi modules listed above')
-    parser.add_argument('--nostrip', dest='nostrip', action='store_true',
-        help='do not strip the exe, making it ~10MB larger')
+    parser.add_argument('--no-keep-debug', dest='keep_debug',
+                        action='store_false', help='do not keep debug symbols')
     parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe,
         help='target executable name, defaults to "pypy"')
     parser.add_argument('--archive-name', dest='name', type=str, default='',
@@ -295,8 +289,8 @@
         help='use as pypy exe instead of pypy/goal/pypy-c')
     options = parser.parse_args(args)
 
-    if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"):
-        options.nostrip = True
+    if os.environ.has_key("PYPY_PACKAGE_NOKEEPDEBUG"):
+        options.keep_debug = False
     if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"):
         options.no_tk = True
     if not options.builddir:
diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/release/smartstrip.py
@@ -0,0 +1,32 @@
+"""
+Strip symbols from an executable, but keep them in a .debug file
+"""
+
+import sys
+import os
+import py
+
+def _strip(exe):
+    if sys.platform == 'win32':
+        pass
+    elif sys.platform == 'darwin':
+        # 'strip' fun: see issue #587 for why -x
+        os.system("strip -x " + str(exe))    # ignore errors
+    else:
+        os.system("strip " + str(exe))       # ignore errors
+
+def _extract_debug_symbols(exe, debug):
+    if sys.platform == 'linux2':
+        os.system("objcopy --only-keep-debug %s %s" % (exe, debug))
+        os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe))
+
+def smartstrip(exe, keep_debug=True):
+    exe = py.path.local(exe)
+    debug = py.path.local(str(exe) + '.debug')
+    if keep_debug:
+        _extract_debug_symbols(exe, debug)
+    _strip(exe)
+
+
+if __name__ == '__main__':
+    smartstrip(sys.argv[1])
diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/release/test/test_smartstrip.py
@@ -0,0 +1,50 @@
+import pytest
+import sys
+import os
+from commands import getoutput
+from pypy.tool.release.smartstrip import smartstrip
+
+ at pytest.fixture
+def exe(tmpdir):
+    src = tmpdir.join("myprog.c")
+    src.write("""
+    int foo(int a, int b) {
+        return a+b;
+    }
+    int main(void) { }
+    """)
+    exe = tmpdir.join("myprog")
+    ret = os.system("gcc -o %s %s" % (exe, src))
+    assert ret == 0
+    return exe
+
+def info_symbol(exe, symbol):
+    out = getoutput("gdb %s -ex 'info symbol %s' -ex 'quit'" % (exe, symbol))
+    lines = out.splitlines()
+    return lines[-1]
+
+ at pytest.mark.skipif(sys.platform == 'win32',
+                    reason='strip not supported on windows')
+class TestSmarStrip(object):
+
+    def test_info_symbol(self, exe):
+        info = info_symbol(exe, "foo")
+        assert info == "foo in section .text"
+
+    def test_strip(self, exe):
+        smartstrip(exe, keep_debug=False)
+        info = info_symbol(exe, "foo")
+        assert info.startswith("No symbol table is loaded")
+
+    @pytest.mark.skipif(sys.platform != 'linux2',
+                        reason='keep_debug not supported')
+    def test_keep_debug(self, exe, tmpdir):
+        smartstrip(exe, keep_debug=True)
+        debug = tmpdir.join("myprog.debug")
+        assert debug.check(file=True)
+        info = info_symbol(exe, "foo")
+        assert info == "foo in section .text of %s" % exe
+        #
+        debug.remove()
+        info = info_symbol(exe, "foo")
+        assert info.startswith("No symbol table is loaded")

From pypy.commits at gmail.com  Wed Nov  1 15:30:42 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 01 Nov 2017 12:30:42 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: patches from issue 2694,
 implement ctypes.CDLL(... handle=n)
Message-ID: <59fa20e2.95b6df0a.e17b6.799e@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92896:86e686981d73
Date: 2017-11-01 21:13 +0200
http://bitbucket.org/pypy/pypy/changeset/86e686981d73/

Log:	patches from issue 2694, implement ctypes.CDLL(... handle=n)

diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -314,7 +314,7 @@
 # ========================================================================
 
 class W_CDLL(W_Root):
-    def __init__(self, space, name, mode):
+    def __init__(self, space, name, mode, handle):
         self.flags = libffi.FUNCFLAG_CDECL
         self.space = space
         if name is None:
@@ -322,7 +322,7 @@
         else:
             self.name = name
         try:
-            self.cdll = libffi.CDLL(name, mode)
+            self.cdll = libffi.CDLL(name, mode, handle)
         except DLOpenError as e:
             raise wrap_dlopenerror(space, e, self.name)
         except OSError as e:
@@ -344,9 +344,9 @@
     def getidentifier(self, space):
         return space.newint(self.cdll.getidentifier())
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_cdll(space, w_type, name, mode=-1):
-    return W_CDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_cdll(space, w_type, name, mode=-1, handle=0):
+    return W_CDLL(space, name, mode, handle)
 
 
 W_CDLL.typedef = TypeDef(
@@ -359,13 +359,13 @@
     )
 
 class W_WinDLL(W_CDLL):
-    def __init__(self, space, name, mode):
-        W_CDLL.__init__(self, space, name, mode)
+    def __init__(self, space, name, mode, handle):
+        W_CDLL.__init__(self, space, name, mode, handle)
         self.flags = libffi.FUNCFLAG_STDCALL
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_windll(space, w_type, name, mode=-1):
-    return W_WinDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_windll(space, w_type, name, mode=-1, handle=0):
+    return W_WinDLL(space, name, mode, handle)
 
 
 W_WinDLL.typedef = TypeDef(
@@ -380,4 +380,4 @@
 # ========================================================================
 
 def get_libc(space):
-    return W_CDLL(space, get_libc_name(), -1)
+    return W_CDLL(space, get_libc_name(), -1, 0)
diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py
--- a/rpython/rlib/libffi.py
+++ b/rpython/rlib/libffi.py
@@ -434,11 +434,12 @@
 
 # XXX: it partially duplicate the code in clibffi.py
 class CDLL(object):
-    def __init__(self, libname, mode=-1):
+    def __init__(self, libname, mode=-1, lib=0):
         """Load the library, or raises DLOpenError."""
-        self.lib = rffi.cast(DLLHANDLE, 0)
-        with rffi.scoped_str2charp(libname) as ll_libname:
-            self.lib = dlopen(ll_libname, mode)
+        self.lib = rffi.cast(DLLHANDLE, lib)
+        if lib == 0:
+            with rffi.scoped_str2charp(libname) as ll_libname:
+                self.lib = dlopen(ll_libname, mode)
 
     def __del__(self):
         if self.lib:

From pypy.commits at gmail.com  Wed Nov  1 15:30:44 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 01 Nov 2017 12:30:44 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: patch from issue2695,
 fail early in ll2ctypes RTLD code
Message-ID: <59fa20e4.48d31c0a.39652.cc49@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92897:16db4e36eac0
Date: 2017-11-01 21:14 +0200
http://bitbucket.org/pypy/pypy/changeset/16db4e36eac0/

Log:	patch from issue2695, fail early in ll2ctypes RTLD code

diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -1147,7 +1147,7 @@
     libc_name = get_libc_name()     # Make sure the name is determined during import, not at runtime
     if _FREEBSD:
         RTLD_DEFAULT = -2  # see <dlfcn.h>
-        rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs)
+        rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs)
     # XXX is this always correct???
     standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs)
 
@@ -1243,7 +1243,7 @@
 
     if cfunc is None:
         if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'):
-            cfunc = get_on_lib(rtld_default_lib, funcname)
+            cfunc = rtld_default_lib[funcname]
         else:
             cfunc = get_on_lib(standard_c_lib, funcname)
         # XXX magic: on Windows try to load the function from 'kernel32' too

From pypy.commits at gmail.com  Wed Nov  1 15:30:46 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 01 Nov 2017 12:30:46 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: patches from issue #2696,
 fix various tests on FreeBSD
Message-ID: <59fa20e6.4e9d1c0a.3489.5242@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92898:e9096f3b8ca5
Date: 2017-11-01 21:21 +0200
http://bitbucket.org/pypy/pypy/changeset/e9096f3b8ca5/

Log:	patches from issue #2696, fix various tests on FreeBSD

diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -1,3 +1,4 @@
+import py
 import sys
 from rpython.tool.udir import udir
 from pypy.tool.pytest.objspace import gettestobjspace
@@ -107,6 +108,7 @@
         _vmprof.disable()
         assert _vmprof.is_enabled() is False
 
+    @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented")
     def test_get_profile_path(self):
         import _vmprof
         tmpfile = open(self.tmpfilename, 'wb')
diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py
--- a/pypy/module/termios/test/test_termios.py
+++ b/pypy/module/termios/test/test_termios.py
@@ -7,9 +7,6 @@
 if os.name != 'posix':
     py.test.skip('termios module only available on unix')
 
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
-
 class TestTermios(object):
     def setup_class(cls):
         try:
diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py
--- a/pypy/module/test_lib_pypy/pyrepl/__init__.py
+++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py
@@ -1,6 +1,3 @@
 import sys
 import lib_pypy.pyrepl
 sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl']
-
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
--- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py
+++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
@@ -4,7 +4,7 @@
 
 
 @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or "
-                    "'kfreebsd' in sys.platform")
+                    "'freebsd' in sys.platform")
 def test_raw_input():
     import os
     import pty

From pypy.commits at gmail.com  Wed Nov  1 15:30:48 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 01 Nov 2017 12:30:48 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: patches from issue #2697 fix
 compilation on FreeBSD
Message-ID: <59fa20e8.46901c0a.c1e8e.7e26@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92899:0e6fa4b45bfa
Date: 2017-11-01 21:29 +0200
http://bitbucket.org/pypy/pypy/changeset/0e6fa4b45bfa/

Log:	patches from issue #2697 fix compilation on FreeBSD

diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py
--- a/pypy/tool/cpyext/extbuild.py
+++ b/pypy/tool/cpyext/extbuild.py
@@ -244,13 +244,13 @@
     if sys.platform == 'win32':
         compile_extra = ["/we4013"]
         link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')]
-    elif sys.platform == 'darwin':
-        compile_extra = link_extra = None
-        pass
     elif sys.platform.startswith('linux'):
         compile_extra = [
             "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"]
         link_extra = None
+    else:
+        compile_extra = link_extra = None
+        pass
     return ExtensionCompiler(
         builddir_base=base_dir,
         include_extra=[get_python_inc()],
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -47,7 +47,10 @@
     # Guessing a BSD-like Unix platform
     compile_extra += ['-DVMPROF_UNIX']
     compile_extra += ['-DVMPROF_MAC']
-    _libs = []
+    if sys.platform.startswith('freebsd'):
+        _libs = ['unwind']
+    else:
+        _libs = []
 
 
 eci_kwds = dict(

From pypy.commits at gmail.com  Thu Nov  2 04:07:35 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 01:07:35 -0700 (PDT)
Subject: [pypy-commit] pypy default: hack the cffi hack to import setuptools
 on a fresh pypy, gaaaa!
Message-ID: <59fad247.9b88df0a.5accb.a6bf@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92900:a2348b760a36
Date: 2017-11-02 09:52 +0200
http://bitbucket.org/pypy/pypy/changeset/a2348b760a36/

Log:	hack the cffi hack to import setuptools on a fresh pypy, gaaaa!

diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py
--- a/pypy/tool/build_cffi_imports.py
+++ b/pypy/tool/build_cffi_imports.py
@@ -22,6 +22,12 @@
 
     shutil.rmtree(str(basedir.join('lib_pypy', '__pycache__')),
                   ignore_errors=True)
+    # be sure pip, setuptools are installed in a fresh pypy
+    # allows proper functioning of cffi on win32 with newer vc compilers
+    # XXX move this to a build slave step?
+    status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import setuptools'])
+    if status  != 0:
+        status, stdout, stderr = run_subprocess(str(pypy_c), ['-m', 'ensurepip'])
     failures = []
     for key, module in sorted(cffi_build_scripts.items()):
         if module is None or getattr(options, 'no_' + key, False):

From pypy.commits at gmail.com  Thu Nov  2 04:07:37 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 01:07:37 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: merge default into branch
Message-ID: <59fad249.cc091c0a.9de42.e9fc@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92901:9112cc43a6cc
Date: 2017-11-02 09:53 +0200
http://bitbucket.org/pypy/pypy/changeset/9112cc43a6cc/

Log:	merge default into branch

diff too long, truncating to 2000 out of 3454 lines

diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py
--- a/pypy/module/_cppyy/__init__.py
+++ b/pypy/module/_cppyy/__init__.py
@@ -7,7 +7,7 @@
     interpleveldefs = {
         '_resolve_name'          : 'interp_cppyy.resolve_name',
         '_scope_byname'          : 'interp_cppyy.scope_byname',
-        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_is_template'           : 'interp_cppyy.is_template',
         '_std_string_name'       : 'interp_cppyy.std_string_name',
         '_set_class_generator'   : 'interp_cppyy.set_class_generator',
         '_set_function_generator': 'interp_cppyy.set_function_generator',
@@ -15,7 +15,9 @@
         '_get_nullptr'           : 'interp_cppyy.get_nullptr',
         'CPPClassBase'           : 'interp_cppyy.W_CPPClass',
         'addressof'              : 'interp_cppyy.addressof',
+        '_bind_object'           : 'interp_cppyy._bind_object',
         'bind_object'            : 'interp_cppyy.bind_object',
+        'move'                   : 'interp_cppyy.move',
     }
 
     appleveldefs = {
diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py
--- a/pypy/module/_cppyy/capi/loadable_capi.py
+++ b/pypy/module/_cppyy/capi/loadable_capi.py
@@ -217,7 +217,8 @@
             'method_req_args'          : ([c_scope, c_index],         c_int),
             'method_arg_type'          : ([c_scope, c_index, c_int],  c_ccharp),
             'method_arg_default'       : ([c_scope, c_index, c_int],  c_ccharp),
-            'method_signature'         : ([c_scope, c_index],         c_ccharp),
+            'method_signature'         : ([c_scope, c_index, c_int],  c_ccharp),
+            'method_prototype'         : ([c_scope, c_index, c_int],  c_ccharp),
 
             'method_is_template'       : ([c_scope, c_index],         c_int),
             'method_num_template_args' : ([c_scope, c_index],         c_int),
@@ -498,9 +499,12 @@
 def c_method_arg_default(space, cppscope, index, arg_index):
     args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)]
     return charp2str_free(space, call_capi(space, 'method_arg_default', args))
-def c_method_signature(space, cppscope, index):
-    args = [_ArgH(cppscope.handle), _ArgL(index)]
+def c_method_signature(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
     return charp2str_free(space, call_capi(space, 'method_signature', args))
+def c_method_prototype(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
+    return charp2str_free(space, call_capi(space, 'method_prototype', args))
 
 def c_method_is_template(space, cppscope, index):
     args = [_ArgH(cppscope.handle), _ArgL(index)]
diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py
--- a/pypy/module/_cppyy/converter.py
+++ b/pypy/module/_cppyy/converter.py
@@ -4,7 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rawrefcount
 
 from pypy.module._rawffi.interp_rawffi import letter2tp
 from pypy.module._rawffi.array import W_Array, W_ArrayInstance
@@ -21,9 +21,9 @@
 # match for the qualified type.
 
 
-def get_rawobject(space, w_obj):
+def get_rawobject(space, w_obj, can_be_None=True):
     from pypy.module._cppyy.interp_cppyy import W_CPPClass
-    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None)
     if cppinstance:
         rawobject = cppinstance.get_rawobject()
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
@@ -48,17 +48,16 @@
     return capi.C_NULL_OBJECT
 
 def is_nullpointer_specialcase(space, w_obj):
-    # 0, None, and nullptr may serve as "NULL", check for any of them
+    # 0 and nullptr may serve as "NULL"
 
     # integer 0
     try:
         return space.int_w(w_obj) == 0
     except Exception:
         pass
-    # None or nullptr
+    # C++-style nullptr
     from pypy.module._cppyy import interp_cppyy
-    return space.is_true(space.is_(w_obj, space.w_None)) or \
-        space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
+    return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
 
 def get_rawbuffer(space, w_obj):
     # raw buffer
@@ -74,7 +73,7 @@
             return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space)))
     except Exception:
         pass
-    # pre-defined NULL
+    # pre-defined nullptr
     if is_nullpointer_specialcase(space, w_obj):
         return rffi.cast(rffi.VOIDP, 0)
     raise TypeError("not an addressable buffer")
@@ -392,6 +391,7 @@
     _immutable_fields_ = ['typecode']
     typecode = 'g'
 
+
 class CStringConverter(TypeConverter):
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.LONGP, address)
@@ -408,18 +408,27 @@
     def free_argument(self, space, arg, call_local):
         lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw')
 
+class CStringConverterWithSize(CStringConverter):
+    _immutable_fields_ = ['size']
+
+    def __init__(self, space, extra):
+        self.size = extra
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        charpptr = rffi.cast(rffi.CCHARP, address)
+        strsize = self.size
+        if charpptr[self.size-1] == '\0':
+            strsize = self.size-1  # rffi will add \0 back
+        return space.newbytes(rffi.charpsize2str(charpptr, strsize))
+
 
 class VoidPtrConverter(TypeConverter):
     def _unwrap_object(self, space, w_obj):
         try:
             obj = get_rawbuffer(space, w_obj)
         except TypeError:
-            try:
-                # TODO: accept a 'capsule' rather than naked int
-                # (do accept int(0), though)
-                obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj))
-            except Exception:
-                obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+            obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False))
         return obj
 
     def cffi_type(self, space):
@@ -463,12 +472,12 @@
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.VOIDPP, address)
         ba = rffi.cast(rffi.CCHARP, address)
-        r = rffi.cast(rffi.VOIDPP, call_local)
         try:
-            r[0] = get_rawbuffer(space, w_obj)
+            x[0] = get_rawbuffer(space, w_obj)
         except TypeError:
+            r = rffi.cast(rffi.VOIDPP, call_local)
             r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
-        x[0] = rffi.cast(rffi.VOIDP, call_local)
+            x[0] = rffi.cast(rffi.VOIDP, call_local)
         ba[capi.c_function_arg_typeoffset(space)] = self.typecode
 
     def finalize_call(self, space, w_obj, call_local):
@@ -495,9 +504,13 @@
     def _unwrap_object(self, space, w_obj):
         from pypy.module._cppyy.interp_cppyy import W_CPPClass
         if isinstance(w_obj, W_CPPClass):
-            if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl):
+            from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                # reject moves as all are explicit
+                raise ValueError("lvalue expected")
+            if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl):
                 rawobject = w_obj.get_rawobject()
-                offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1)
+                offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1)
                 obj_address = capi.direct_ptradd(rawobject, offset)
                 return rffi.cast(capi.C_OBJECT, obj_address)
         raise oefmt(space.w_TypeError,
@@ -518,6 +531,17 @@
         x = rffi.cast(rffi.VOIDPP, address)
         x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
 
+class InstanceMoveConverter(InstanceRefConverter):
+    def _unwrap_object(self, space, w_obj):
+        # moving is same as by-ref, but have to check that move is allowed
+        from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE
+        if isinstance(w_obj, W_CPPClass):
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE
+                return InstanceRefConverter._unwrap_object(self, space, w_obj)
+        raise oefmt(space.w_ValueError, "object is not an rvalue")
+
+
 class InstanceConverter(InstanceRefConverter):
 
     def convert_argument_libffi(self, space, w_obj, address, call_local):
@@ -527,7 +551,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         self._is_abstract(space)
@@ -548,7 +572,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset))
@@ -582,8 +606,8 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl,
-                                           do_cast=False, is_ref=True)
+        return interp_cppyy.wrap_cppinstance(
+            space, address, self.clsdecl, do_cast=False, is_ref=True)
 
 class StdStringConverter(InstanceConverter):
 
@@ -606,7 +630,7 @@
             assign = self.clsdecl.get_overload("__assign__")
             from pypy.module._cppyy import interp_cppyy
             assign.call(
-                interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value])
+                interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value])
         except Exception:
             InstanceConverter.to_memory(self, space, w_obj, w_value, offset)
 
@@ -672,7 +696,7 @@
 
 _converters = {}         # builtin and custom types
 _a_converters = {}       # array and ptr versions of above
-def get_converter(space, name, default):
+def get_converter(space, _name, default):
     # The matching of the name to a converter should follow:
     #   1) full, exact match
     #       1a) const-removed match
@@ -680,9 +704,9 @@
     #   3) accept ref as pointer (for the stubs, const& can be
     #       by value, but that does not work for the ffi path)
     #   4) generalized cases (covers basically all user classes)
-    #   5) void converter, which fails on use
+    #   5) void* or void converter (which fails on use)
 
-    name = capi.c_resolve_name(space, name)
+    name = capi.c_resolve_name(space, _name)
 
     #   1) full, exact match
     try:
@@ -701,7 +725,7 @@
     clean_name = capi.c_resolve_name(space, helper.clean_type(name))
     try:
         # array_index may be negative to indicate no size or no size found
-        array_size = helper.array_size(name)
+        array_size = helper.array_size(_name)     # uses original arg
         return _a_converters[clean_name+compound](space, array_size)
     except KeyError:
         pass
@@ -719,6 +743,8 @@
             return InstancePtrConverter(space, clsdecl)
         elif compound == "&":
             return InstanceRefConverter(space, clsdecl)
+        elif compound == "&&":
+            return InstanceMoveConverter(space, clsdecl)
         elif compound == "**":
             return InstancePtrPtrConverter(space, clsdecl)
         elif compound == "":
@@ -726,11 +752,13 @@
     elif capi.c_is_enum(space, clean_name):
         return _converters['unsigned'](space, default)
 
-    #   5) void converter, which fails on use
-    #
+    #   5) void* or void converter (which fails on use)
+    if 0 <= compound.find('*'):
+        return VoidPtrConverter(space, default)  # "user knows best"
+
     # return a void converter here, so that the class can be build even
-    # when some types are unknown; this overload will simply fail on use
-    return VoidConverter(space, name)
+    # when some types are unknown
+    return VoidConverter(space, name)            # fails on use
 
 
 _converters["bool"]                     = BoolConverter
@@ -847,6 +875,10 @@
         for name in names:
             _a_converters[name+'[]'] = ArrayConverter
             _a_converters[name+'*']  = PtrConverter
+
+    # special case, const char* w/ size and w/o '\0'
+    _a_converters["const char[]"] = CStringConverterWithSize
+
 _build_array_converters()
 
 # add another set of aliased names
diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py
--- a/pypy/module/_cppyy/executor.py
+++ b/pypy/module/_cppyy/executor.py
@@ -159,7 +159,7 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
         return pyres
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
@@ -167,7 +167,7 @@
         result = rffi.ptradd(buffer, cif_descr.exchange_result)
         from pypy.module._cppyy import interp_cppyy
         ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
 class InstancePtrPtrExecutor(InstancePtrExecutor):
 
@@ -176,7 +176,7 @@
         voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args)
         ref_address = rffi.cast(rffi.VOIDPP, voidp_result)
         ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
@@ -188,8 +188,8 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass,
-                                           do_cast=False, python_owns=True, fresh=True)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass,
+                                             do_cast=False, python_owns=True, fresh=True)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h
--- a/pypy/module/_cppyy/include/capi.h
+++ b/pypy/module/_cppyy/include/capi.h
@@ -19,14 +19,15 @@
     RPY_EXTERN
     int cppyy_num_scopes(cppyy_scope_t parent);
     RPY_EXTERN
-    char* cppyy_scope_name(cppyy_scope_t parent, int iscope);
-
+    char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope);
     RPY_EXTERN
     char* cppyy_resolve_name(const char* cppitem_name);
     RPY_EXTERN
     cppyy_scope_t cppyy_get_scope(const char* scope_name);
     RPY_EXTERN
     cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj);
+    RPY_EXTERN
+    size_t cppyy_size_of(cppyy_type_t klass);
 
     /* memory management ------------------------------------------------------ */
     RPY_EXTERN
@@ -120,6 +121,8 @@
     RPY_EXTERN
     char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
+    char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx);
+    RPY_EXTERN
     char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
     int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx);
@@ -130,7 +133,9 @@
     RPY_EXTERN
     char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index);
     RPY_EXTERN
-    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx);
+    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
+    RPY_EXTERN
+    char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
 
     RPY_EXTERN
     int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx);
@@ -147,8 +152,12 @@
 
     /* method properties ------------------------------------------------------ */
     RPY_EXTERN
+    int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx);
     RPY_EXTERN
+    int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx);
 
     /* data member reflection information ------------------------------------- */
diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py
--- a/pypy/module/_cppyy/interp_cppyy.py
+++ b/pypy/module/_cppyy/interp_cppyy.py
@@ -2,7 +2,7 @@
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
 from pypy.interpreter.baseobjspace import W_Root
 
 from rpython.rtyper.lltypesystem import rffi, lltype, llmemory
@@ -15,6 +15,10 @@
 from pypy.module._cppyy import converter, executor, ffitypes, helper
 
 
+INSTANCE_FLAGS_PYTHON_OWNS = 0x0001
+INSTANCE_FLAGS_IS_REF      = 0x0002
+INSTANCE_FLAGS_IS_R_VALUE  = 0x0004
+
 class FastCallNotPossible(Exception):
     pass
 
@@ -33,16 +37,21 @@
 
 class State(object):
     def __init__(self, space):
+        # final scoped name -> opaque handle
         self.cppscope_cache = {
-            "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) }
+            'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') }
+        # opaque handle -> app-level python class
+        self.cppclass_registry = {}
+        # app-level class generator callback
+        self.w_clgen_callback = None
+        # app-level function generator callback (currently not used)
+        self.w_fngen_callback = None
+        # C++11's nullptr
         self.w_nullptr = None
-        self.cpptemplate_cache = {}
-        self.cppclass_registry = {}
-        self.w_clgen_callback = None
-        self.w_fngen_callback = None
 
 def get_nullptr(space):
-    if hasattr(space, "fake"):
+    # construct a unique address that compares to NULL, serves as nullptr
+    if hasattr(space, 'fake'):
         raise NotImplementedError
     state = space.fromcache(State)
     if state.w_nullptr is None:
@@ -58,52 +67,48 @@
         state.w_nullptr = nullarr
     return state.w_nullptr
 
- at unwrap_spec(name='text')
-def resolve_name(space, name):
-    return space.newtext(capi.c_resolve_name(space, name))
+ at unwrap_spec(scoped_name='text')
+def resolve_name(space, scoped_name):
+    return space.newtext(capi.c_resolve_name(space, scoped_name))
 
- at unwrap_spec(name='text')
-def scope_byname(space, name):
-    true_name = capi.c_resolve_name(space, name)
 
+# memoized lookup of handles by final, scoped, name of classes/namespaces
+ at unwrap_spec(final_scoped_name='text')
+def scope_byname(space, final_scoped_name):
     state = space.fromcache(State)
     try:
-        return state.cppscope_cache[true_name]
+        return state.cppscope_cache[final_scoped_name]
     except KeyError:
         pass
 
-    opaque_handle = capi.c_get_scope_opaque(space, true_name)
+    opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name)
     assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
     if opaque_handle:
-        final_name = capi.c_final_name(space, opaque_handle)
-        if capi.c_is_namespace(space, opaque_handle):
-            cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle)
-        elif capi.c_has_complex_hierarchy(space, opaque_handle):
-            cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle)
+        isns = capi.c_is_namespace(space, opaque_handle)
+        if isns:
+            cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name)
         else:
-            cppscope = W_CPPClassDecl(space, final_name, opaque_handle)
-        state.cppscope_cache[name] = cppscope
+            if capi.c_has_complex_hierarchy(space, opaque_handle):
+                cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name)
+            else:
+                cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name)
 
-        cppscope._build_methods()
-        cppscope._find_datamembers()
+        # store in the cache to prevent recursion
+        state.cppscope_cache[final_scoped_name] = cppscope
+
+        if not isns:
+            # build methods/data; TODO: also defer this for classes (a functional __dir__
+            # and instrospection for help() is enough and allows more lazy loading)
+            cppscope._build_methods()
+            cppscope._find_datamembers()
+
         return cppscope
 
     return None
 
- at unwrap_spec(name='text')
-def template_byname(space, name):
-    state = space.fromcache(State)
-    try:
-        return state.cpptemplate_cache[name]
-    except KeyError:
-        pass
-
-    if capi.c_is_template(space, name):
-        cpptemplate = W_CPPTemplateType(space, name)
-        state.cpptemplate_cache[name] = cpptemplate
-        return cpptemplate
-
-    return None
+ at unwrap_spec(final_scoped_name='text')
+def is_template(space, final_scoped_name):
+    return space.newbool(capi.c_is_template(space, final_scoped_name))
 
 def std_string_name(space):
     return space.newtext(capi.std_string_name)
@@ -189,8 +194,13 @@
         # check number of given arguments against required (== total - defaults)
         args_expected = len(self.arg_defs)
         args_given = len(args_w)
-        if args_expected < args_given or args_given < self.args_required:
-            raise oefmt(self.space.w_TypeError, "wrong number of arguments")
+
+        if args_given < self.args_required:
+            raise oefmt(self.space.w_TypeError,
+                "takes at least %d arguments (%d given)", self.args_required, args_given)
+        elif args_expected < args_given:
+            raise oefmt(self.space.w_TypeError,
+                "takes at most %d arguments (%d given)", args_expected, args_given)
 
         # initial setup of converters, executors, and libffi (if available)
         if self.converters is None:
@@ -376,8 +386,11 @@
             conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i)
         capi.c_deallocate_function_args(self.space, args)
 
-    def signature(self):
-        return capi.c_method_signature(self.space, self.scope, self.index)
+    def signature(self, show_formalargs=True):
+        return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs)
+
+    def prototype(self, show_formalargs=True):
+        return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs)
 
     def priority(self):
         total_arg_priority = 0
@@ -391,7 +404,7 @@
             lltype.free(self.cif_descr, flavor='raw')
 
     def __repr__(self):
-        return "CPPMethod: %s" % self.signature()
+        return "CPPMethod: %s" % self.prototype()
 
     def _freeze_(self):
         assert 0, "you should never have a pre-built instance of this!"
@@ -407,7 +420,7 @@
         return capi.C_NULL_OBJECT
 
     def __repr__(self):
-        return "CPPFunction: %s" % self.signature()
+        return "CPPFunction: %s" % self.prototype()
 
 
 class CPPTemplatedCall(CPPMethod):
@@ -440,7 +453,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPTemplatedCall: %s" % self.signature()
+        return "CPPTemplatedCall: %s" % self.prototype()
 
 
 class CPPConstructor(CPPMethod):
@@ -462,7 +475,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPConstructor: %s" % self.signature()
+        return "CPPConstructor: %s" % self.prototype()
 
 
 class CPPSetItem(CPPMethod):
@@ -549,12 +562,12 @@
                     w_exc_type = e.w_type
                 elif all_same_type and not e.match(self.space, w_exc_type):
                     all_same_type = False
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    '+e.errorstr(self.space)
             except Exception as e:
                 # can not special case this for non-overloaded functions as we anyway need an
                 # OperationError error down from here
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    Exception: '+str(e)
 
         if all_same_type and w_exc_type is not None:
@@ -562,20 +575,20 @@
         else:
             raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg))
 
-    def signature(self):
-        sig = self.functions[0].signature()
+    def prototype(self):
+        sig = self.functions[0].prototype()
         for i in range(1, len(self.functions)):
-            sig += '\n'+self.functions[i].signature()
+            sig += '\n'+self.functions[i].prototype()
         return self.space.newtext(sig)
 
     def __repr__(self):
-        return "W_CPPOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPOverload.typedef = TypeDef(
     'CPPOverload',
     is_static = interp2app(W_CPPOverload.is_static),
     call = interp2app(W_CPPOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPOverload.prototype),
 )
 
 
@@ -591,24 +604,40 @@
     @jit.unroll_safe
     @unwrap_spec(args_w='args_w')
     def call(self, w_cppinstance, args_w):
+        # TODO: factor out the following:
+        if capi.c_is_abstract(self.space, self.scope.handle):
+            raise oefmt(self.space.w_TypeError,
+                        "cannot instantiate abstract class '%s'",
+                        self.scope.name)
         w_result = W_CPPOverload.call(self, w_cppinstance, args_w)
         newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result))
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if cppinstance is not None:
             cppinstance._rawobject = newthis
             memory_regulator.register(cppinstance)
-            return w_cppinstance
-        return wrap_cppobject(self.space, newthis, self.functions[0].scope,
-                              do_cast=False, python_owns=True, fresh=True)
 
     def __repr__(self):
-        return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPConstructorOverload.typedef = TypeDef(
     'CPPConstructorOverload',
     is_static = interp2app(W_CPPConstructorOverload.is_static),
     call = interp2app(W_CPPConstructorOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPConstructorOverload.prototype),
+)
+
+
+class W_CPPTemplateOverload(W_CPPOverload):
+    @unwrap_spec(args_w='args_w')
+    def __getitem__(self, args_w):
+        pass
+
+    def __repr__(self):
+        return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions]
+
+W_CPPTemplateOverload.typedef = TypeDef(
+    'CPPTemplateOverload',
+    __getitem__ = interp2app(W_CPPTemplateOverload.call),
 )
 
 
@@ -622,6 +651,9 @@
     def __call__(self, args_w):
         return self.method.bound_call(self.cppthis, args_w)
 
+    def __repr__(self):
+        return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions]
+
 W_CPPBoundMethod.typedef = TypeDef(
     'CPPBoundMethod',
     __call__ = interp2app(W_CPPBoundMethod.__call__),
@@ -643,8 +675,8 @@
 
     def _get_offset(self, cppinstance):
         if cppinstance:
-            assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle)
-            offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope)
+            assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle)
+            offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope)
         else:
             offset = self.offset
         return offset
@@ -652,7 +684,7 @@
     def get(self, w_cppinstance, w_pycppclass):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset)
@@ -660,7 +692,7 @@
     def set(self, w_cppinstance, w_value):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         self.converter.to_memory(self.space, w_cppinstance, w_value, offset)
@@ -705,12 +737,12 @@
         return space.w_False
 
 class W_CPPScopeDecl(W_Root):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
     _immutable_fields_ = ['handle', 'name']
 
-    def __init__(self, space, name, opaque_handle):
+    def __init__(self, space, opaque_handle, final_scoped_name):
         self.space = space
-        self.name = name
+        self.name = final_scoped_name
         assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
         self.handle = opaque_handle
         self.methods = {}
@@ -753,7 +785,7 @@
         overload = self.get_overload(name)
         sig = '(%s)' % signature
         for f in overload.functions:
-            if 0 < f.signature().find(sig):
+            if f.signature(False) == sig:
                 return W_CPPOverload(self.space, self, [f])
         raise oefmt(self.space.w_LookupError, "no overload matches signature")
 
@@ -769,6 +801,9 @@
 # classes for inheritance. Both are python classes, though, and refactoring
 # may be in order at some point.
 class W_CPPNamespaceDecl(W_CPPScopeDecl):
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name']
+
     def _make_cppfunction(self, pyname, index):
         num_args = capi.c_method_num_args(self.space, self, index)
         args_required = capi.c_method_req_args(self.space, self, index)
@@ -779,9 +814,6 @@
             arg_defs.append((arg_type, arg_dflt))
         return CPPFunction(self.space, self, index, arg_defs, args_required)
 
-    def _build_methods(self):
-        pass       # force lazy lookups in namespaces
-
     def _make_datamember(self, dm_name, dm_idx):
         type_name = capi.c_datamember_type(self.space, self, dm_idx)
         offset = capi.c_datamember_offset(self.space, self, dm_idx)
@@ -791,9 +823,6 @@
         self.datamembers[dm_name] = datamember
         return datamember
 
-    def _find_datamembers(self):
-        pass       # force lazy lookups in namespaces
-
     def find_overload(self, meth_name):
         indices = capi.c_method_indices_from_name(self.space, self, meth_name)
         if not indices:
@@ -855,18 +884,21 @@
 
 
 class W_CPPClassDecl(W_CPPScopeDecl):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
-    _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]']
 
     def _build_methods(self):
         assert len(self.methods) == 0
         methods_temp = {}
         for i in range(capi.c_num_methods(self.space, self)):
             idx = capi.c_method_index_at(self.space, self, i)
-            pyname = helper.map_operator_name(self.space,
-                capi.c_method_name(self.space, self, idx),
-                capi.c_method_num_args(self.space, self, idx),
-                capi.c_method_result_type(self.space, self, idx))
+            if capi.c_is_constructor(self.space, self, idx):
+                pyname = '__init__'
+            else:
+                pyname = helper.map_operator_name(self.space,
+                    capi.c_method_name(self.space, self, idx),
+                    capi.c_method_num_args(self.space, self, idx),
+                    capi.c_method_result_type(self.space, self, idx))
             cppmethod = self._make_cppfunction(pyname, idx)
             methods_temp.setdefault(pyname, []).append(cppmethod)
         # the following covers the case where the only kind of operator[](idx)
@@ -883,7 +915,7 @@
         # create the overload methods from the method sets
         for pyname, methods in methods_temp.iteritems():
             CPPMethodSort(methods).sort()
-            if pyname == self.name:
+            if pyname == '__init__':
                 overload = W_CPPConstructorOverload(self.space, self, methods[:])
             else:
                 overload = W_CPPOverload(self.space, self, methods[:])
@@ -934,11 +966,11 @@
         raise self.missing_attribute_error(name)
 
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return 0
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return cppinstance.get_rawobject()
 
     def is_namespace(self):
@@ -973,13 +1005,13 @@
 
 class W_CPPComplexClassDecl(W_CPPClassDecl):
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = capi.c_base_offset(self.space,
                                     self, calling_scope, cppinstance.get_rawobject(), 1)
         return offset
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = self.get_base_offset(cppinstance, calling_scope)
         return capi.direct_ptradd(cppinstance.get_rawobject(), offset)
 
@@ -997,70 +1029,56 @@
 W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False
 
 
-class W_CPPTemplateType(W_Root):
-    _attrs_ = ['space', 'name']
-    _immutable_fields = ['name']
-
-    def __init__(self, space, name):
-        self.space = space
-        self.name = name
-
-    @unwrap_spec(args_w='args_w')
-    def __call__(self, args_w):
-        # TODO: this is broken but unused (see pythonify.py)
-        fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>'])
-        return scope_byname(self.space, fullname)
-
-W_CPPTemplateType.typedef = TypeDef(
-    'CPPTemplateType',
-    __call__ = interp2app(W_CPPTemplateType.__call__),
-)
-W_CPPTemplateType.typedef.acceptable_as_base_class = False
-
-
 class W_CPPClass(W_Root):
-    _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns',
+    _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags',
                'finalizer_registered']
-    _immutable_fields_ = ["cppclass", "isref"]
+    _immutable_fields_ = ['clsdecl']
 
     finalizer_registered = False
 
-    def __init__(self, space, cppclass, rawobject, isref, python_owns):
+    def __init__(self, space, decl, rawobject, isref, python_owns):
         self.space = space
-        self.cppclass = cppclass
+        self.clsdecl = decl
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
         assert not isref or rawobject
         self._rawobject = rawobject
         assert not isref or not python_owns
-        self.isref = isref
-        self.python_owns = python_owns
-        self._opt_register_finalizer()
+        self.flags = 0
+        if isref:
+            self.flags |= INSTANCE_FLAGS_IS_REF
+        if python_owns:
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
 
     def _opt_register_finalizer(self):
-        if self.python_owns and not self.finalizer_registered \
-               and not hasattr(self.space, "fake"):
+        if not self.finalizer_registered and not hasattr(self.space, "fake"):
+            assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS
             self.register_finalizer(self.space)
             self.finalizer_registered = True
 
     def _nullcheck(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             raise oefmt(self.space.w_ReferenceError,
                         "trying to access a NULL pointer")
 
     # allow user to determine ownership rules on a per object level
     def fget_python_owns(self, space):
-        return space.newbool(self.python_owns)
+        return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS))
 
     @unwrap_spec(value=bool)
     def fset_python_owns(self, space, value):
-        self.python_owns = space.is_true(value)
-        self._opt_register_finalizer()
+        if space.is_true(value):
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
+        else:
+            self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS
 
     def get_cppthis(self, calling_scope):
-        return self.cppclass.get_cppthis(self, calling_scope)
+        return self.clsdecl.get_cppthis(self, calling_scope)
 
     def get_rawobject(self):
-        if not self.isref:
+        if not (self.flags & INSTANCE_FLAGS_IS_REF):
             return self._rawobject
         else:
             ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject)
@@ -1078,12 +1096,9 @@
         return None
 
     def instance__init__(self, args_w):
-        if capi.c_is_abstract(self.space, self.cppclass.handle):
-            raise oefmt(self.space.w_TypeError,
-                        "cannot instantiate abstract class '%s'",
-                        self.cppclass.name)
-        constructor_overload = self.cppclass.get_overload(self.cppclass.name)
-        constructor_overload.call(self, args_w)
+        raise oefmt(self.space.w_TypeError,
+                    "cannot instantiate abstract class '%s'",
+                    self.clsdecl.name)
  
     def instance__eq__(self, w_other):
         # special case: if other is None, compare pointer-style
@@ -1099,7 +1114,7 @@
             for name in ["", "__gnu_cxx", "__1"]:
                 nss = scope_byname(self.space, name)
                 meth_idx = capi.c_get_global_operator(
-                    self.space, nss, self.cppclass, other.cppclass, "operator==")
+                    self.space, nss, self.clsdecl, other.clsdecl, "operator==")
                 if meth_idx != -1:
                     f = nss._make_cppfunction("operator==", meth_idx)
                     ol = W_CPPOverload(self.space, nss, [f])
@@ -1118,14 +1133,15 @@
         # fallback 2: direct pointer comparison (the class comparison is needed since
         # the first data member in a struct and the struct have the same address)
         other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False)  # TODO: factor out
-        iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass)
+        iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl)
         return self.space.newbool(iseq)
 
     def instance__ne__(self, w_other):
         return self.space.not_(self.instance__eq__(w_other))
 
     def instance__nonzero__(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             return self.space.w_False
         return self.space.w_True
 
@@ -1134,36 +1150,35 @@
         if w_as_builtin is not None:
             return self.space.len(w_as_builtin)
         raise oefmt(self.space.w_TypeError,
-                    "'%s' has no length", self.cppclass.name)
+                    "'%s' has no length", self.clsdecl.name)
 
     def instance__cmp__(self, w_other):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.cmp(w_as_builtin, w_other)
         raise oefmt(self.space.w_AttributeError,
-                    "'%s' has no attribute __cmp__", self.cppclass.name)
+                    "'%s' has no attribute __cmp__", self.clsdecl.name)
 
     def instance__repr__(self):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.repr(w_as_builtin)
         return self.space.newtext("<%s object at 0x%x>" %
-                               (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
+                               (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
 
     def destruct(self):
-        if self._rawobject and not self.isref:
+        if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF):
             memory_regulator.unregister(self)
-            capi.c_destruct(self.space, self.cppclass, self._rawobject)
+            capi.c_destruct(self.space, self.clsdecl, self._rawobject)
             self._rawobject = capi.C_NULL_OBJECT
 
     def _finalize_(self):
-        if self.python_owns:
+        if self.flags & INSTANCE_FLAGS_PYTHON_OWNS:
             self.destruct()
 
 W_CPPClass.typedef = TypeDef(
     'CPPClass',
-    cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass),
-    _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
+    __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
     __init__ = interp2app(W_CPPClass.instance__init__),
     __eq__ = interp2app(W_CPPClass.instance__eq__),
     __ne__ = interp2app(W_CPPClass.instance__ne__),
@@ -1220,21 +1235,21 @@
     state = space.fromcache(State)
     return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar))
 
-def wrap_cppobject(space, rawobject, cppclass,
-                   do_cast=True, python_owns=False, is_ref=False, fresh=False):
+def wrap_cppinstance(space, rawobject, clsdecl,
+                     do_cast=True, python_owns=False, is_ref=False, fresh=False):
     rawobject = rffi.cast(capi.C_OBJECT, rawobject)
 
     # cast to actual if requested and possible
     w_pycppclass = None
     if do_cast and rawobject:
-        actual = capi.c_actual_class(space, cppclass, rawobject)
-        if actual != cppclass.handle:
+        actual = capi.c_actual_class(space, clsdecl, rawobject)
+        if actual != clsdecl.handle:
             try:
                 w_pycppclass = get_pythonized_cppclass(space, actual)
-                offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1)
+                offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1)
                 rawobject = capi.direct_ptradd(rawobject, offset)
-                w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-                cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
+                w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+                clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False)
             except Exception:
                 # failed to locate/build the derived class, so stick to the base (note
                 # that only get_pythonized_cppclass is expected to raise, so none of
@@ -1242,18 +1257,18 @@
                 pass
 
     if w_pycppclass is None:
-        w_pycppclass = get_pythonized_cppclass(space, cppclass.handle)
+        w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle)
 
     # try to recycle existing object if this one is not newly created
     if not fresh and rawobject:
         obj = memory_regulator.retrieve(rawobject)
-        if obj is not None and obj.cppclass is cppclass:
+        if obj is not None and obj.clsdecl is clsdecl:
             return obj
 
     # fresh creation
     w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass)
     cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False)
-    cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns)
+    cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns)
     memory_regulator.register(cppinstance)
     return w_cppinstance
 
@@ -1264,7 +1279,7 @@
     except TypeError:
         pass
     # attempt to get address of C++ instance
-    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj))
+    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False))
 
 @unwrap_spec(w_obj=W_Root)
 def addressof(space, w_obj):
@@ -1273,19 +1288,30 @@
     return space.newlong(address)
 
 @unwrap_spec(owns=bool, cast=bool)
-def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
-    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False):
     try:
         # attempt address from array or C++ instance
         rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj))
     except Exception:
         # accept integer value as address
         rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj))
-    w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-    if not w_cppclass:
-        w_cppclass = scope_byname(space, space.text_w(w_pycppclass))
-        if not w_cppclass:
+    decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False)
+    return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast)
+
+ at unwrap_spec(owns=bool, cast=bool)
+def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
+    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+    w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+    if not w_clsdecl:
+        w_clsdecl = scope_byname(space, space.text_w(w_pycppclass))
+        if not w_clsdecl:
             raise oefmt(space.w_TypeError,
                         "no such class: %s", space.text_w(w_pycppclass))
-    cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
-    return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns)
+    return _bind_object(space, w_obj, w_clsdecl, owns, cast)
+
+def move(space, w_obj):
+    """Casts the given instance into an C++-style rvalue."""
+    obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    if obj:
+        obj.flags |= INSTANCE_FLAGS_IS_R_VALUE
+    return w_obj
diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py
--- a/pypy/module/_cppyy/pythonify.py
+++ b/pypy/module/_cppyy/pythonify.py
@@ -10,7 +10,7 @@
 class CPPMetaScope(type):
     def __getattr__(self, name):
         try:
-            return get_pycppitem(self, name)  # will cache on self
+            return get_scoped_pycppitem(self, name)  # will cache on self
         except Exception as e:
             raise AttributeError("%s object has no attribute '%s' (details: %s)" %
                                  (self, name, str(e)))
@@ -36,11 +36,14 @@
             self._scope = scope
 
     def _arg_to_str(self, arg):
-        if arg == str:
-            import _cppyy
-            arg = _cppyy._std_string_name()
-        elif type(arg) != str:
-            arg = arg.__name__
+        try:
+            arg = arg.__cppname__
+        except AttributeError:
+            if arg == str:
+                import _cppyy
+                arg = _cppyy._std_string_name()
+            elif type(arg) != str:
+                arg = arg.__name__
         return arg
 
     def __call__(self, *args):
@@ -58,8 +61,36 @@
         return self.__call__(*args)
 
 
-def clgen_callback(name):
-    return get_pycppclass(name)
+def scope_splitter(name):
+    is_open_template, scope = 0, ""
+    for c in name:
+        if c == ':' and not is_open_template:
+            if scope:
+                yield scope
+                scope = ""
+            continue
+        elif c == '<':
+            is_open_template += 1
+        elif c == '>':
+            is_open_template -= 1
+        scope += c
+    yield scope
+
+def get_pycppitem(final_scoped_name):
+    # walk scopes recursively down from global namespace ("::") to get the
+    # actual (i.e. not typedef'ed) class, triggering all necessary creation
+    scope = gbl
+    for name in scope_splitter(final_scoped_name):
+        scope = getattr(scope, name)
+    return scope
+get_pycppclass = get_pycppitem     # currently no distinction, but might
+                                   # in future for performance
+
+
+# callbacks (originating from interp_cppyy.py) to allow interp-level to
+# initiate creation of app-level classes and function
+def clgen_callback(final_scoped_name):
+    return get_pycppclass(final_scoped_name)
 
 def fngen_callback(func, npar): # todo, some kind of arg transform spec
     if npar == 0:
@@ -75,20 +106,19 @@
         return wrapper
 
 
+# construction of namespaces and classes, and their helpers
+def make_module_name(scope):
+    if scope:
+        return scope.__module__ + '.' + scope.__name__
+    return 'cppyy'
+
 def make_static_function(func_name, cppol):
     def function(*args):
         return cppol.call(None, *args)
     function.__name__ = func_name
-    function.__doc__ = cppol.signature()
+    function.__doc__ = cppol.prototype()
     return staticmethod(function)
 
-def make_method(meth_name, cppol):
-    def method(self, *args):
-        return cppol.call(self, *args)
-    method.__name__ = meth_name
-    method.__doc__ = cppol.signature()
-    return method
-
 
 def make_cppnamespace(scope, name, decl):
     # build up a representation of a C++ namespace (namespaces are classes)
@@ -98,20 +128,19 @@
     ns_meta = type(name+'_meta', (CPPMetaNamespace,), {})
 
     # create the python-side C++ namespace representation, cache in scope if given
-    d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ }
+    d = {"__cppdecl__" : decl,
+         "__module__" : make_module_name(scope),
+         "__cppname__" : decl.__cppname__ }
     pyns = ns_meta(name, (CPPNamespace,), d)
     if scope:
         setattr(scope, name, pyns)
 
     # install as modules to allow importing from (note naming: cppyy)
-    modname = 'cppyy.gbl'
-    if scope:
-        modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.')
-    sys.modules[modname] = pyns
+    sys.modules[make_module_name(pyns)] = pyns
     return pyns
 
 def _drop_cycles(bases):
-    # TODO: figure this out, as it seems to be a PyPy bug?!
+    # TODO: figure out why this is necessary?
     for b1 in bases:
         for b2 in bases:
             if not (b1 is b2) and issubclass(b2, b1):
@@ -119,27 +148,37 @@
                 break
     return tuple(bases)
 
-def make_new(class_name):
+
+def make_new(decl):
     def __new__(cls, *args):
         # create a place-holder only as there may be a derived class defined
+        # TODO: get rid of the import and add user-land bind_object that uses
+        # _bind_object (see interp_cppyy.py)
         import _cppyy
-        instance = _cppyy.bind_object(0, class_name, True)
+        instance = _cppyy._bind_object(0, decl, True)
         if not instance.__class__ is cls:
             instance.__class__ = cls     # happens for derived class
         return instance
     return __new__
 
-def make_cppclass(scope, class_name, final_class_name, decl):
+def make_method(meth_name, cppol):
+    def method(self, *args):
+        return cppol.call(self, *args)
+    method.__name__ = meth_name
+    method.__doc__ = cppol.prototype()
+    return method
+
+def make_cppclass(scope, cl_name, decl):
 
     # get a list of base classes for class creation
     bases = [get_pycppclass(base) for base in decl.get_base_names()]
     if not bases:
         bases = [CPPClass,]
     else:
-        # it's technically possible that the required class now has been built
-        # if one of the base classes uses it in e.g. a function interface
+        # it's possible that the required class now has been built if one of
+        # the base classes uses it in e.g. a function interface
         try:
-            return scope.__dict__[final_class_name]
+            return scope.__dict__[cl_name]
         except KeyError:
             pass
 
@@ -147,39 +186,41 @@
     d_meta = {}
 
     # prepare dictionary for python-side C++ class representation
-    def dispatch(self, name, signature):
-        cppol = decl.dispatch(name, signature)
-        return types.MethodType(make_method(name, cppol), self, type(self))
+    def dispatch(self, m_name, signature):
+        cppol = decl.__dispatch__(m_name, signature)
+        return types.MethodType(make_method(m_name, cppol), self, type(self))
     d_class = {"__cppdecl__"   : decl,
+         "__new__"      : make_new(decl),
+         "__module__"   : make_module_name(scope),
          "__cppname__"  : decl.__cppname__,
-         "__new__"      : make_new(class_name),
+         "__dispatch__" : dispatch,
          }
 
     # insert (static) methods into the class dictionary
-    for name in decl.get_method_names():
-        cppol = decl.get_overload(name)
+    for m_name in decl.get_method_names():
+        cppol = decl.get_overload(m_name)
         if cppol.is_static():
-            d_class[name] = make_static_function(name, cppol)
+            d_class[m_name] = make_static_function(m_name, cppol)
         else:
-            d_class[name] = make_method(name, cppol)
+            d_class[m_name] = make_method(m_name, cppol)
 
     # add all data members to the dictionary of the class to be created, and
     # static ones also to the metaclass (needed for property setters)
-    for name in decl.get_datamember_names():
-        cppdm = decl.get_datamember(name)
-        d_class[name] = cppdm
+    for d_name in decl.get_datamember_names():
+        cppdm = decl.get_datamember(d_name)
+        d_class[d_name] = cppdm
         if cppdm.is_static():
-            d_meta[name] = cppdm
+            d_meta[d_name] = cppdm
 
     # create a metaclass to allow properties (for static data write access)
     metabases = [type(base) for base in bases]
-    metacpp = type(CPPMetaScope)(class_name+'_meta', _drop_cycles(metabases), d_meta)
+    metacpp = type(CPPMetaScope)(cl_name+'_meta', _drop_cycles(metabases), d_meta)
 
     # create the python-side C++ class
-    pycls = metacpp(class_name, _drop_cycles(bases), d_class)
+    pycls = metacpp(cl_name, _drop_cycles(bases), d_class)
 
     # store the class on its outer scope
-    setattr(scope, final_class_name, pycls)
+    setattr(scope, cl_name, pycls)
 
     # the call to register will add back-end specific pythonizations and thus
     # needs to run first, so that the generic pythonizations can use them
@@ -192,32 +233,32 @@
     return CPPTemplate(template_name, scope)
 
 
-def get_pycppitem(scope, name):
+def get_scoped_pycppitem(scope, name):
     import _cppyy
 
-    # resolve typedefs/aliases
-    full_name = (scope == gbl) and name or (scope.__name__+'::'+name)
-    true_name = _cppyy._resolve_name(full_name)
-    if true_name != full_name:
-        return get_pycppclass(true_name)
+    # resolve typedefs/aliases: these may cross namespaces, in which case
+    # the lookup must trigger the creation of all necessary scopes
+    scoped_name = (scope == gbl) and name or (scope.__cppname__+'::'+name)
+    final_scoped_name = _cppyy._resolve_name(scoped_name)
+    if final_scoped_name != scoped_name:
+        pycppitem = get_pycppitem(final_scoped_name)
+        # also store on the requested scope (effectively a typedef or pointer copy)
+        setattr(scope, name, pycppitem)
+        return pycppitem
 
     pycppitem = None
 
-    # classes
-    cppitem = _cppyy._scope_byname(true_name)
+    # scopes (classes and namespaces)
+    cppitem = _cppyy._scope_byname(final_scoped_name)
     if cppitem:
-        name = true_name
-        if scope != gbl:
-            name = true_name[len(scope.__cppname__)+2:]
         if cppitem.is_namespace():
             pycppitem = make_cppnamespace(scope, name, cppitem)
-            setattr(scope, name, pycppitem)
         else:
-            pycppitem = make_cppclass(scope, name, true_name, cppitem)
+            pycppitem = make_cppclass(scope, name, cppitem)
 
     # templates
     if not cppitem:
-        cppitem = _cppyy._template_byname(true_name)
+        cppitem = _cppyy._is_template(final_scoped_name)
         if cppitem:
             pycppitem = make_cpptemplatetype(scope, name)
             setattr(scope, name, pycppitem)
@@ -249,29 +290,6 @@
     raise AttributeError("'%s' has no attribute '%s'" % (str(scope), name))
 
 
-def scope_splitter(name):
-    is_open_template, scope = 0, ""
-    for c in name:
-        if c == ':' and not is_open_template:
-            if scope:
-                yield scope
-                scope = ""
-            continue
-        elif c == '<':
-            is_open_template += 1
-        elif c == '>':
-            is_open_template -= 1
-        scope += c
-    yield scope
-
-def get_pycppclass(name):
-    # break up the name, to walk the scopes and get the class recursively
-    scope = gbl
-    for part in scope_splitter(name):
-        scope = getattr(scope, part)
-    return scope
-
-
 # pythonization by decoration (move to their own file?)
 def python_style_getitem(self, idx):
     # python-style indexing: check for size and allow indexing from the back
@@ -346,8 +364,8 @@
     # also the fallback on the indexed __getitem__, but that is slower)
     if not 'vector' in pyclass.__name__[:11] and \
             ('begin' in pyclass.__dict__ and 'end' in pyclass.__dict__):
-        if _cppyy._scope_byname(pyclass.__name__+'::iterator') or \
-                _cppyy._scope_byname(pyclass.__name__+'::const_iterator'):
+        if _cppyy._scope_byname(pyclass.__cppname__+'::iterator') or \
+                _cppyy._scope_byname(pyclass.__cppname__+'::const_iterator'):
             def __iter__(self):
                 i = self.begin()
                 while i != self.end():
@@ -416,17 +434,21 @@
     # pre-create std to allow direct importing
     gbl.std = make_cppnamespace(gbl, 'std', _cppyy._scope_byname('std'))
 
+    # add move cast
+    gbl.std.move = _cppyy.move
+
     # install a type for enums to refer to
     # TODO: this is correct for C++98, not for C++11 and in general there will
     # be the same issue for all typedef'd builtin types
     setattr(gbl, 'internal_enum_type_t', int)
 
-    # install nullptr as a unique reference
-    setattr(gbl, 'nullptr', _cppyy._get_nullptr())
-
     # install for user access
     _cppyy.gbl = gbl
 
+    # install nullptr as a unique reference
+    _cppyy.nullptr = _cppyy._get_nullptr()
+
+
 # user-defined pythonizations interface
 _pythonizations = {}
 def add_pythonization(class_name, callback):
diff --git a/pypy/module/_cppyy/src/dummy_backend.cxx b/pypy/module/_cppyy/src/dummy_backend.cxx
--- a/pypy/module/_cppyy/src/dummy_backend.cxx
+++ b/pypy/module/_cppyy/src/dummy_backend.cxx
@@ -955,7 +955,13 @@
     return cppstring_to_cstring("");
 }
 
-char* cppyy_method_signature(cppyy_scope_t /* handle */, cppyy_index_t /* method_index */) {
+char* cppyy_method_signature(
+        cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) {
+    return cppstring_to_cstring("");
+}
+
+char* cppyy_method_prototype(
+        cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) {
     return cppstring_to_cstring("");
 }
 
diff --git a/pypy/module/_cppyy/test/Makefile b/pypy/module/_cppyy/test/Makefile
--- a/pypy/module/_cppyy/test/Makefile
+++ b/pypy/module/_cppyy/test/Makefile
@@ -1,12 +1,14 @@
 dicts = advancedcppDict.so \
         advancedcpp2Dict.so \
+        cpp11featuresDict.so \
         crossingDict.so \
         datatypesDict.so \
         example01Dict.so \
         fragileDict.so \
         operatorsDict.so \
         overloadsDict.so \
-        stltypesDict.so
+        stltypesDict.so \
+        templatesDict.so
 
 all : $(dicts)
 
diff --git a/pypy/module/_cppyy/test/advancedcpp.cxx b/pypy/module/_cppyy/test/advancedcpp.cxx
--- a/pypy/module/_cppyy/test/advancedcpp.cxx
+++ b/pypy/module/_cppyy/test/advancedcpp.cxx
@@ -106,17 +106,6 @@
 }
 
 
-// more template testing
-long my_templated_method_class::get_size() { return -1; }
-
-long my_templated_method_class::get_char_size()   { return (long)sizeof(char); }
-long my_templated_method_class::get_int_size()    { return (long)sizeof(int); }
-long my_templated_method_class::get_long_size()   { return (long)sizeof(long); }
-long my_templated_method_class::get_float_size()  { return (long)sizeof(float); }
-long my_templated_method_class::get_double_size() { return (long)sizeof(double); }
-long my_templated_method_class::get_self_size()   { return (long)sizeof(my_templated_method_class); }
-
-
 // overload order testing
 int overload_one_way::gime() const { return 1; }
 std::string overload_one_way::gime() { return "aap"; }
diff --git a/pypy/module/_cppyy/test/advancedcpp.h b/pypy/module/_cppyy/test/advancedcpp.h
--- a/pypy/module/_cppyy/test/advancedcpp.h
+++ b/pypy/module/_cppyy/test/advancedcpp.h
@@ -246,8 +246,6 @@
     int m_i;
 };
 
-template class std::vector<ref_tester>;
-
 
 //===========================================================================
 class some_convertible {           // for math conversions testing
@@ -275,6 +273,7 @@
 extern double my_global_double;    // a couple of globals for access testing
 extern double my_global_array[500];
 extern double* my_global_ptr;
+static const char my_global_string[] = "aap " " noot " " mies";
 
 //===========================================================================
 class some_class_with_data {       // for life-line and identity testing
@@ -387,37 +386,6 @@
 template char my_templated_function<char>(char);
 template double my_templated_function<double>(double);
 
-class my_templated_method_class {
-public:
-    long get_size();      // to get around bug in genreflex
-    template<class B> long get_size();
-
-    long get_char_size();
-    long get_int_size();
-    long get_long_size();
-    long get_float_size();
-    long get_double_size();
-
-    long get_self_size();
-
-private:
-    double m_data[3];
-};
-
-template<class B>
-inline long my_templated_method_class::get_size() {
-    return sizeof(B);
-}
-
-template long my_templated_method_class::get_size<char>();
-template long my_templated_method_class::get_size<int>();
-template long my_templated_method_class::get_size<long>();
-template long my_templated_method_class::get_size<float>();
-template long my_templated_method_class::get_size<double>();
-
-typedef my_templated_method_class my_typedef_t;
-template long my_templated_method_class::get_size<my_typedef_t>();
-
 
 //===========================================================================
 class overload_one_way {           // overload order testing
diff --git a/pypy/module/_cppyy/test/advancedcpp.xml b/pypy/module/_cppyy/test/advancedcpp.xml
--- a/pypy/module/_cppyy/test/advancedcpp.xml
+++ b/pypy/module/_cppyy/test/advancedcpp.xml
@@ -53,8 +53,6 @@
   <class name="std::vector<float>" />
   <class pattern="my_templated_class<*>" />
   <function pattern="my_templated_function<*>" />
-  <class name="my_templated_method_class" />
-  <class name="my_typedef_t" />
 
   <class name="overload_one_way" />
   <class name="overload_the_other_way" />
diff --git a/pypy/module/_cppyy/test/cpp11features.cxx b/pypy/module/_cppyy/test/cpp11features.cxx
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/cpp11features.cxx
@@ -0,0 +1,18 @@
+#if __cplusplus >= 201103L
+
+#include "cpp11features.h"
+
+
+// for std::shared_ptr<> testing
+int TestSharedPtr::s_counter = 0;
+
+std::shared_ptr<TestSharedPtr> create_shared_ptr_instance() {
+    return std::shared_ptr<TestSharedPtr>(new TestSharedPtr);
+}
+
+
+// for move ctors etc.
+int TestMoving1::s_move_counter = 0;
+int TestMoving2::s_move_counter = 0;
+
+#endif // c++11 and later
diff --git a/pypy/module/_cppyy/test/cpp11features.h b/pypy/module/_cppyy/test/cpp11features.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/cpp11features.h
@@ -0,0 +1,45 @@
+#if __cplusplus >= 201103L
+
+#include <memory>
+
+
+//===========================================================================
+class TestSharedPtr {        // for std::shared_ptr<> testing
+public:
+    static int s_counter;
+
+public:
+    TestSharedPtr() { ++s_counter; }
+    TestSharedPtr(const TestSharedPtr&) { ++s_counter; }
+    ~TestSharedPtr() { --s_counter; }
+};
+
+std::shared_ptr<TestSharedPtr> create_shared_ptr_instance();
+
+
+//===========================================================================
+class TestMoving1 {          // for move ctors etc.
+public:
+    static int s_move_counter;
+
+public:
+    TestMoving1() {}
+    TestMoving1(TestMoving1&&) { ++s_move_counter; }
+    TestMoving1(const TestMoving1&) {}
+    TestMoving1& operator=(TestMoving1&&) { ++s_move_counter; return *this; }
+    TestMoving1& operator=(TestMoving1&) { return *this; }
+};
+
+class TestMoving2 {          // note opposite method order from TestMoving1
+public:
+    static int s_move_counter;
+
+public:
+    TestMoving2() {}
+    TestMoving2(const TestMoving2&) {}
+    TestMoving2(TestMoving2&& other) { ++s_move_counter; }
+    TestMoving2& operator=(TestMoving2&) { return *this; }
+    TestMoving2& operator=(TestMoving2&&) { ++s_move_counter; return *this; }
+};
+
+#endif // c++11 and later
diff --git a/pypy/module/_cppyy/test/cpp11features.xml b/pypy/module/_cppyy/test/cpp11features.xml
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/cpp11features.xml
@@ -0,0 +1,6 @@
+<lcgdict>
+
+  <class name="TestSharedPtr" />
+  <class pattern="TestMoving*" />
+
+</lcgdict>
diff --git a/pypy/module/_cppyy/test/fragile.h b/pypy/module/_cppyy/test/fragile.h
--- a/pypy/module/_cppyy/test/fragile.h
+++ b/pypy/module/_cppyy/test/fragile.h
@@ -30,9 +30,11 @@
     void overload(int, no_such_class* p = 0) {}
 };
 
+static const int dummy_location = 0xdead;
+
 class E {
 public:
-    E() : m_pp_no_such(0), m_pp_a(0) {}
+    E() : m_pp_no_such((no_such_class**)&dummy_location), m_pp_a(0) {}
 
     virtual int check() { return (int)'E'; }
     void overload(no_such_class**) {}
diff --git a/pypy/module/_cppyy/test/templates.cxx b/pypy/module/_cppyy/test/templates.cxx
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/templates.cxx
@@ -0,0 +1,12 @@
+#include "templates.h"
+
+
+// template methods
+long MyTemplatedMethodClass::get_size() { return -1; }
+
+long MyTemplatedMethodClass::get_char_size()   { return (long)sizeof(char); }
+long MyTemplatedMethodClass::get_int_size()    { return (long)sizeof(int); }
+long MyTemplatedMethodClass::get_long_size()   { return (long)42; /* "lying" */ }
+long MyTemplatedMethodClass::get_float_size()  { return (long)sizeof(float); }
+long MyTemplatedMethodClass::get_double_size() { return (long)sizeof(double); }
+long MyTemplatedMethodClass::get_self_size()   { return (long)sizeof(MyTemplatedMethodClass); }
diff --git a/pypy/module/_cppyy/test/templates.h b/pypy/module/_cppyy/test/templates.h
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/templates.h
@@ -0,0 +1,35 @@
+//===========================================================================
+class MyTemplatedMethodClass {         // template methods
+public:
+    long get_size();      // to get around bug in genreflex
+    template<class B> long get_size();
+
+    long get_char_size();
+    long get_int_size();
+    long get_long_size();
+    long get_float_size();
+    long get_double_size();
+
+    long get_self_size();
+
+private:
+    double m_data[3];
+};
+
+template<class B>
+inline long MyTemplatedMethodClass::get_size() {
+    return sizeof(B);
+}
+
+//
+typedef MyTemplatedMethodClass MyTMCTypedef_t;
+
+// explicit instantiation
+template long MyTemplatedMethodClass::get_size<char>();
+template long MyTemplatedMethodClass::get_size<int>();
+
+// "lying" specialization
+template<>
+inline long MyTemplatedMethodClass::get_size<long>() {
+    return 42;
+}
diff --git a/pypy/module/_cppyy/test/templates.xml b/pypy/module/_cppyy/test/templates.xml
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cppyy/test/templates.xml
@@ -0,0 +1,6 @@
+<lcgdict>
+
+  <class name="MyTemplatedMethodClass" />
+  <class name="MyTMCTypedef_t" />
+
+</lcgdict>
diff --git a/pypy/module/_cppyy/test/test_advancedcpp.py b/pypy/module/_cppyy/test/test_advancedcpp.py
--- a/pypy/module/_cppyy/test/test_advancedcpp.py
+++ b/pypy/module/_cppyy/test/test_advancedcpp.py
@@ -28,9 +28,9 @@
     def test01_default_arguments(self):
         """Test usage of default arguments"""
 
-        import _cppyy
+        import _cppyy as cppyy
         def test_defaulter(n, t):
-            defaulter = getattr(_cppyy.gbl, '%s_defaulter' % n)
+            defaulter = getattr(cppyy.gbl, '%s_defaulter' % n)
 
             d = defaulter()
             assert d.m_a == t(11)
@@ -55,23 +55,23 @@
             assert d.m_b ==  t(4)
             assert d.m_c ==  t(5)
             d.__destruct__()
-        test_defaulter('short', int)
+        test_defaulter('short',  int)
         test_defaulter('ushort', int)
-        test_defaulter('int', int)
-        test_defaulter('uint', int)
-        test_defaulter('long', long)
-        test_defaulter('ulong', long)
-        test_defaulter('llong', long)
+        test_defaulter('int',    int)
+        test_defaulter('uint',   int)
+        test_defaulter('long',   long)
+        test_defaulter('ulong',  long)
+        test_defaulter('llong',  long)
         test_defaulter('ullong', long)
-        test_defaulter('float', float)
+        test_defaulter('float',  float)
         test_defaulter('double', float)
 
     def test02_simple_inheritance(self):
         """Test binding of a basic inheritance structure"""
 
-        import _cppyy
-        base_class    = _cppyy.gbl.base_class
-        derived_class = _cppyy.gbl.derived_class
+        import _cppyy as cppyy
+        base_class    = cppyy.gbl.base_class
+        derived_class = cppyy.gbl.derived_class
 
         assert issubclass(derived_class, base_class)
         assert not issubclass(base_class, derived_class)
@@ -123,8 +123,8 @@
     def test03_namespaces(self):
         """Test access to namespaces and inner classes"""
 
-        import _cppyy
-        gbl = _cppyy.gbl
+        import _cppyy as cppyy
+        gbl = cppyy.gbl
 
         assert gbl.a_ns      is gbl.a_ns
         assert gbl.a_ns.d_ns is gbl.a_ns.d_ns
@@ -150,8 +150,8 @@
     def test03a_namespace_lookup_on_update(self):
         """Test whether namespaces can be shared across dictionaries."""
 
-        import _cppyy, ctypes
-        gbl = _cppyy.gbl
+        import _cppyy as cppyy, ctypes
+        gbl = cppyy.gbl
 
         lib2 = ctypes.CDLL("./advancedcpp2Dict.so", ctypes.RTLD_GLOBAL)
 
@@ -179,8 +179,8 @@
     def test04_template_types(self):
         """Test bindings of templated types"""
 
-        import _cppyy
-        gbl = _cppyy.gbl
+        import _cppyy as cppyy
+        gbl = cppyy.gbl
 
         assert gbl.T1 is gbl.T1
         assert gbl.T2 is gbl.T2
@@ -245,8 +245,8 @@
     def test05_abstract_classes(self):
         """Test non-instatiatability of abstract classes"""
 
-        import _cppyy
-        gbl = _cppyy.gbl
+        import _cppyy as cppyy
+        gbl = cppyy.gbl
 
         raises(TypeError, gbl.a_class)
         raises(TypeError, gbl.some_abstract_class)
@@ -260,12 +260,12 @@
     def test06_datamembers(self):
         """Test data member access when using virtual inheritence"""
 
-        import _cppyy
-        a_class   = _cppyy.gbl.a_class
-        b_class   = _cppyy.gbl.b_class
-        c_class_1 = _cppyy.gbl.c_class_1
-        c_class_2 = _cppyy.gbl.c_class_2
-        d_class   = _cppyy.gbl.d_class
+        import _cppyy as cppyy
+        a_class   = cppyy.gbl.a_class
+        b_class   = cppyy.gbl.b_class
+        c_class_1 = cppyy.gbl.c_class_1
+        c_class_2 = cppyy.gbl.c_class_2
+        d_class   = cppyy.gbl.d_class
 
         assert issubclass(b_class, a_class)
         assert issubclass(c_class_1, a_class)
@@ -354,8 +354,8 @@
     def test07_pass_by_reference(self):
         """Test reference passing when using virtual inheritance"""
 
-        import _cppyy
-        gbl = _cppyy.gbl
+        import _cppyy as cppyy
+        gbl = cppyy.gbl
         b_class = gbl.b_class
         c_class = gbl.c_class_2
         d_class = gbl.d_class
@@ -387,71 +387,75 @@
     def test08_void_pointer_passing(self):
         """Test passing of variants of void pointer arguments"""
 
-        import _cppyy
-        pointer_pass        = _cppyy.gbl.pointer_pass
-        some_concrete_class = _cppyy.gbl.some_concrete_class
+        import _cppyy as cppyy
+        pointer_pass        = cppyy.gbl.pointer_pass
+        some_concrete_class = cppyy.gbl.some_concrete_class
 
         pp = pointer_pass()
         o = some_concrete_class()
 
-        assert _cppyy.addressof(o) == pp.gime_address_ptr(o)
-        assert _cppyy.addressof(o) == pp.gime_address_ptr_ptr(o)
-        assert _cppyy.addressof(o) == pp.gime_address_ptr_ref(o)
+        assert cppyy.addressof(o) == pp.gime_address_ptr(o)
+        assert cppyy.addressof(o) == pp.gime_address_ptr_ptr(o)
+        assert cppyy.addressof(o) == pp.gime_address_ptr_ref(o)
 
         import array
-        addressofo = array.array('l', [_cppyy.addressof(o)])
-        assert addressofo.buffer_info()[0] == pp.gime_address_ptr_ptr(addressofo)
+        addressofo = array.array('l', [cppyy.addressof(o)])
+        assert addressofo[0] == pp.gime_address_ptr_ptr(addressofo)
 
         assert 0 == pp.gime_address_ptr(0)
-        assert 0 == pp.gime_address_ptr(None)
+        raises(TypeError, pp.gime_address_ptr, None)
 
-        ptr = _cppyy.bind_object(0, some_concrete_class)
-        assert _cppyy.addressof(ptr) == 0
+        ptr = cppyy.bind_object(0, some_concrete_class)
+        assert cppyy.addressof(ptr) == 0
         pp.set_address_ptr_ref(ptr)
-        assert _cppyy.addressof(ptr) == 0x1234
+        assert cppyy.addressof(ptr) == 0x1234
         pp.set_address_ptr_ptr(ptr)
-        assert _cppyy.addressof(ptr) == 0x4321
+        assert cppyy.addressof(ptr) == 0x4321
+
+        assert cppyy.addressof(cppyy.nullptr) == 0
+        raises(TypeError, cppyy.addressof, None)
+        assert cppyy.addressof(0)             == 0
 
     def test09_opaque_pointer_passing(self):
         """Test passing around of opaque pointers"""
 
-        import _cppyy
-        some_concrete_class = _cppyy.gbl.some_concrete_class
+        import _cppyy as cppyy
+        some_concrete_class = cppyy.gbl.some_concrete_class
 
         o = some_concrete_class()
 
         # TODO: figure out the PyPy equivalent of CObject (may have to do this
         # through the C-API from C++)
 
-        #cobj = _cppyy.as_cobject(o)
-        addr = _cppyy.addressof(o)
+        #cobj = cppyy.as_cobject(o)
+        addr = cppyy.addressof(o)
 
-        #assert o == _cppyy.bind_object(cobj, some_concrete_class)
-        #assert o == _cppyy.bind_object(cobj, type(o))
-        #assert o == _cppyy.bind_object(cobj, o.__class__)
-        #assert o == _cppyy.bind_object(cobj, "some_concrete_class")
-        assert _cppyy.addressof(o) == _cppyy.addressof(_cppyy.bind_object(addr, some_concrete_class))
-        assert o == _cppyy.bind_object(addr, some_concrete_class)
-        assert o == _cppyy.bind_object(addr, type(o))
-        assert o == _cppyy.bind_object(addr, o.__class__)
-        assert o == _cppyy.bind_object(addr, "some_concrete_class")
-        raises(TypeError, _cppyy.bind_object, addr, "does_not_exist")
-        raises(TypeError, _cppyy.bind_object, addr, 1)
+        #assert o == cppyy.bind_object(cobj, some_concrete_class)
+        #assert o == cppyy.bind_object(cobj, type(o))
+        #assert o == cppyy.bind_object(cobj, o.__class__)
+        #assert o == cppyy.bind_object(cobj, "some_concrete_class")
+        assert cppyy.addressof(o) == cppyy.addressof(cppyy.bind_object(addr, some_concrete_class))
+        assert o == cppyy.bind_object(addr, some_concrete_class)
+        assert o == cppyy.bind_object(addr, type(o))
+        assert o == cppyy.bind_object(addr, o.__class__)
+        assert o == cppyy.bind_object(addr, "some_concrete_class")
+        raises(TypeError, cppyy.bind_object, addr, "does_not_exist")
+        raises(TypeError, cppyy.bind_object, addr, 1)
 
     def test10_object_identity(self):
         """Test object identity"""
 
-        import _cppyy
-        some_concrete_class  = _cppyy.gbl.some_concrete_class
-        some_class_with_data = _cppyy.gbl.some_class_with_data
+        import _cppyy as cppyy
+        some_concrete_class  = cppyy.gbl.some_concrete_class
+        some_class_with_data = cppyy.gbl.some_class_with_data
 
         o = some_concrete_class()
-        addr = _cppyy.addressof(o)
+        addr = cppyy.addressof(o)
 
-        o2 = _cppyy.bind_object(addr, some_concrete_class)
+        o2 = cppyy.bind_object(addr, some_concrete_class)
         assert o is o2
 
-        o3 = _cppyy.bind_object(addr, some_class_with_data)
+        o3 = cppyy.bind_object(addr, some_class_with_data)
         assert not o is o3
 
         d1 = some_class_with_data()
@@ -472,13 +476,13 @@
     def test11_multi_methods(self):
         """Test calling of methods from multiple inheritance"""
 
-        import _cppyy
-        multi = _cppyy.gbl.multi
+        import _cppyy as cppyy
+        multi = cppyy.gbl.multi
 
-        assert _cppyy.gbl.multi1 is multi.__bases__[0]
-        assert _cppyy.gbl.multi2 is multi.__bases__[1]

From pypy.commits at gmail.com  Thu Nov  2 04:07:39 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 01:07:39 -0700 (PDT)
Subject: [pypy-commit] pypy py3.5: merge default into py3.5
Message-ID: <59fad24b.95091c0a.13a9.6c3a@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.5
Changeset: r92902:6311c6a24eba
Date: 2017-11-02 10:10 +0200
http://bitbucket.org/pypy/pypy/changeset/6311c6a24eba/

Log:	merge default into py3.5

diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py
--- a/pypy/tool/build_cffi_imports.py
+++ b/pypy/tool/build_cffi_imports.py
@@ -145,6 +145,12 @@
 
     shutil.rmtree(str(join(basedir,'lib_pypy','__pycache__')),
                   ignore_errors=True)
+    # be sure pip, setuptools are installed in a fresh pypy
+    # allows proper functioning of cffi on win32 with newer vc compilers
+    # XXX move this to a build slave step?
+    status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import setuptools'])
+    if status  != 0:
+        status, stdout, stderr = run_subprocess(str(pypy_c), ['-m', 'ensurepip'])
     failures = []
 
     for key, module in sorted(cffi_build_scripts.items()):
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -20,6 +20,7 @@
 import py
 import fnmatch
 import subprocess
+from pypy.tool.release.smartstrip import smartstrip
 
 USE_ZIPFILE_MODULE = sys.platform == 'win32'
 
@@ -223,15 +224,8 @@
     old_dir = os.getcwd()
     try:
         os.chdir(str(builddir))
-        if not options.nostrip:
-            for source, target in binaries:
-                if sys.platform == 'win32':
-                    pass
-                elif sys.platform == 'darwin':
-                    # 'strip' fun: see issue #587 for why -x
-                    os.system("strip -x " + str(bindir.join(target)))    # ignore errors
-                else:
-                    os.system("strip " + str(bindir.join(target)))    # ignore errors
+        for source, target in binaries:
+            smartstrip(bindir.join(target), keep_debug=options.keep_debug)
         #
         if USE_ZIPFILE_MODULE:
             import zipfile
@@ -297,8 +291,8 @@
                     help='do not build and package the %r cffi module' % (key,))
     parser.add_argument('--without-cffi', dest='no_cffi', action='store_true',
         help='skip building *all* the cffi modules listed above')
-    parser.add_argument('--nostrip', dest='nostrip', action='store_true',
-        help='do not strip the exe, making it ~10MB larger')
+    parser.add_argument('--no-keep-debug', dest='keep_debug',
+                        action='store_false', help='do not keep debug symbols')
     parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe,
         help='target executable name, defaults to "%s"' % pypy_exe)
     parser.add_argument('--archive-name', dest='name', type=str, default='',
@@ -317,8 +311,8 @@
                         '(default on OS X)')
     options = parser.parse_args(args)
 
-    if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"):
-        options.nostrip = True
+    if os.environ.has_key("PYPY_PACKAGE_NOKEEPDEBUG"):
+        options.keep_debug = False
     if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"):
         options.no_tk = True
     if os.environ.has_key("PYPY_EMBED_DEPENDENCIES"):
diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/release/smartstrip.py
@@ -0,0 +1,32 @@
+"""
+Strip symbols from an executable, but keep them in a .debug file
+"""
+
+import sys
+import os
+import py
+
+def _strip(exe):
+    if sys.platform == 'win32':
+        pass
+    elif sys.platform == 'darwin':
+        # 'strip' fun: see issue #587 for why -x
+        os.system("strip -x " + str(exe))    # ignore errors
+    else:
+        os.system("strip " + str(exe))       # ignore errors
+
+def _extract_debug_symbols(exe, debug):
+    if sys.platform == 'linux2':
+        os.system("objcopy --only-keep-debug %s %s" % (exe, debug))
+        os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe))
+
+def smartstrip(exe, keep_debug=True):
+    exe = py.path.local(exe)
+    debug = py.path.local(str(exe) + '.debug')
+    if keep_debug:
+        _extract_debug_symbols(exe, debug)
+    _strip(exe)
+
+
+if __name__ == '__main__':
+    smartstrip(sys.argv[1])
diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/release/test/test_smartstrip.py
@@ -0,0 +1,50 @@
+import pytest
+import sys
+import os
+from commands import getoutput
+from pypy.tool.release.smartstrip import smartstrip
+
+ at pytest.fixture
+def exe(tmpdir):
+    src = tmpdir.join("myprog.c")
+    src.write("""
+    int foo(int a, int b) {
+        return a+b;
+    }
+    int main(void) { }
+    """)
+    exe = tmpdir.join("myprog")
+    ret = os.system("gcc -o %s %s" % (exe, src))
+    assert ret == 0
+    return exe
+
+def info_symbol(exe, symbol):
+    out = getoutput("gdb %s -ex 'info symbol %s' -ex 'quit'" % (exe, symbol))
+    lines = out.splitlines()
+    return lines[-1]
+
+ at pytest.mark.skipif(sys.platform == 'win32',
+                    reason='strip not supported on windows')
+class TestSmarStrip(object):
+
+    def test_info_symbol(self, exe):
+        info = info_symbol(exe, "foo")
+        assert info == "foo in section .text"
+
+    def test_strip(self, exe):
+        smartstrip(exe, keep_debug=False)
+        info = info_symbol(exe, "foo")
+        assert info.startswith("No symbol table is loaded")
+
+    @pytest.mark.skipif(sys.platform != 'linux2',
+                        reason='keep_debug not supported')
+    def test_keep_debug(self, exe, tmpdir):
+        smartstrip(exe, keep_debug=True)
+        debug = tmpdir.join("myprog.debug")
+        assert debug.check(file=True)
+        info = info_symbol(exe, "foo")
+        assert info == "foo in section .text of %s" % exe
+        #
+        debug.remove()
+        info = info_symbol(exe, "foo")
+        assert info.startswith("No symbol table is loaded")

From pypy.commits at gmail.com  Thu Nov  2 06:38:38 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 02 Nov 2017 03:38:38 -0700 (PDT)
Subject: [pypy-commit] pypy canraise-assertionerror: branch to experiment
 with canraise not raising for assertion error
Message-ID: <59faf5ae.048e1c0a.46413.a1dc@mx.google.com>

Author: fijal
Branch: canraise-assertionerror
Changeset: r92903:904915e5425e
Date: 2017-11-02 11:37 +0100
http://bitbucket.org/pypy/pypy/changeset/904915e5425e/

Log:	branch to experiment with canraise not raising for assertion error

diff --git a/rpython/translator/backendopt/canraise.py b/rpython/translator/backendopt/canraise.py
--- a/rpython/translator/backendopt/canraise.py
+++ b/rpython/translator/backendopt/canraise.py
@@ -1,6 +1,9 @@
 from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS
+from rpython.rtyper.lltypesystem import lltype
+from rpython.rtyper import rclass
 from rpython.tool.ansi_print import AnsiLogger
 from rpython.translator.backendopt import graphanalyze
+from rpython.flowspace import model as flowmodel
 
 log = AnsiLogger("canraise")
 
@@ -8,6 +11,14 @@
 class RaiseAnalyzer(graphanalyze.BoolGraphAnalyzer):
     ignore_exact_class = None
 
+    def __init__(self, translator):
+        graphanalyze.BoolGraphAnalyzer.__init__(self, translator)
+        ed = translator.rtyper.exceptiondata
+        self.ll_assert_error = ed.get_standard_ll_exc_instance_by_class(
+            AssertionError)
+        self.ll_not_impl_error = ed.get_standard_ll_exc_instance_by_class(
+            NotImplementedError)
+
     def do_ignore_memory_error(self):
         self.ignore_exact_class = MemoryError
 
@@ -25,6 +36,12 @@
     analyze_exceptblock = None    # don't call this
 
     def analyze_exceptblock_in_graph(self, graph, block, seen=None):
+        def producer(block, v):
+            for op in block.operations:
+                if op.result is v:
+                    return op
+            assert False
+
         if self.ignore_exact_class is not None:
             from rpython.translator.backendopt.ssa import DataFlowFamilyBuilder
             dff = DataFlowFamilyBuilder(graph)
@@ -38,7 +55,34 @@
                         # it doesn't count.  We'll see the place that really
                         # raises the exception in the first place.
                         return False
-        return True
+        # find all the blocks leading to the raise block
+        blocks = []
+        for candidate in graph.iterblocks():
+            if len(candidate.exits) != 1:
+                continue
+            if candidate.exits[0].target is block:
+                blocks.append(candidate)
+        ignored = 0
+        import pdb
+        pdb.set_trace()
+        for preblock in blocks:
+            exc_val = preblock.exits[0].args[1]
+            if isinstance(exc_val, flowmodel.Constant):
+                exc = exc_val.value
+            else:
+                # find the producer
+                op = producer(preblock, exc_val)
+                if op.opname == 'cast_pointer':
+                    exc_val = op.args[0]
+                    op = producer(preblock, exc_val)
+                if op.opname != 'same_as':
+                    # something strange, return True
+                    return True
+                exc = op.args[0].value
+            p = lltype.cast_pointer(rclass.OBJECTPTR, exc)
+            if p == self.ll_assert_error or p == self.ll_not_impl_error:
+                ignored += 1
+        return ignored < len(blocks)
 
     # backward compatible interface
     def can_raise(self, op, seen=None):
diff --git a/rpython/translator/backendopt/test/test_canraise.py b/rpython/translator/backendopt/test/test_canraise.py
--- a/rpython/translator/backendopt/test/test_canraise.py
+++ b/rpython/translator/backendopt/test/test_canraise.py
@@ -1,7 +1,8 @@
+from rpython.conftest import option
+from rpython.rtyper.lltypesystem import rffi
 from rpython.translator.translator import TranslationContext, graphof
 from rpython.translator.backendopt.canraise import RaiseAnalyzer
 from rpython.translator.backendopt.all import backend_optimizations
-from rpython.conftest import option
 
 class TestCanRaise(object):
     def translate(self, func, sig):
@@ -253,3 +254,48 @@
         ra.do_ignore_memory_error()     # but it's potentially a KeyError
         result = ra.analyze_direct_call(graphof(t, h))
         assert result
+
+    def test_charp2str(self):
+        def f(a):
+            return len(rffi.charp2str(a))
+
+        t, ra = self.translate(f, [rffi.CCHARP])
+        ra.do_ignore_memory_error()
+        result = ra.analyze_direct_call(graphof(t, f))
+        assert not result # ignore AssertionError
+
+    def test_calls_raise_not_impl(self):
+        def raising():
+            raise NotImplementedError
+
+        def not_raising():
+            pass
+
+        def f(a):
+            if a == 15:
+                raising()
+            else:
+                not_raising()
+
+        t, ra = self.translate(f, [int])
+        ra.do_ignore_memory_error()
+        result = ra.analyze_direct_call(graphof(t, f))
+        assert not result # ignore AssertionError
+
+    def test_calls_raise_assertion_error(self):
+        def raising():
+            assert False
+
+        def not_raising():
+            pass
+
+        def f(a):
+            if a == 15:
+                raising()
+            else:
+                not_raising()
+
+        t, ra = self.translate(f, [int])
+        ra.do_ignore_memory_error()
+        result = ra.analyze_direct_call(graphof(t, f))
+        assert not result # ignore AssertionError

From pypy.commits at gmail.com  Thu Nov  2 07:03:14 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 02 Nov 2017 04:03:14 -0700 (PDT)
Subject: [pypy-commit] pypy default: the .debug files don't need to be
 executable, remove the 'x' bit
Message-ID: <59fafb72.88c5df0a.9d3a1.ed36@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r92904:84a682f2e00a
Date: 2017-11-02 11:59 +0100
http://bitbucket.org/pypy/pypy/changeset/84a682f2e00a/

Log:	the .debug files don't need to be executable, remove the 'x' bit

diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py
--- a/pypy/tool/release/smartstrip.py
+++ b/pypy/tool/release/smartstrip.py
@@ -19,6 +19,9 @@
     if sys.platform == 'linux2':
         os.system("objcopy --only-keep-debug %s %s" % (exe, debug))
         os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe))
+        perm = debug.stat().mode
+        perm &= ~(0111) # remove the 'x' bit
+        debug.chmod(perm)
 
 def smartstrip(exe, keep_debug=True):
     exe = py.path.local(exe)
diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py
--- a/pypy/tool/release/test/test_smartstrip.py
+++ b/pypy/tool/release/test/test_smartstrip.py
@@ -42,6 +42,9 @@
         smartstrip(exe, keep_debug=True)
         debug = tmpdir.join("myprog.debug")
         assert debug.check(file=True)
+        perm = debug.stat().mode & 0777
+        assert perm & 0111 == 0 # 'x' bit not set
+        #
         info = info_symbol(exe, "foo")
         assert info == "foo in section .text of %s" % exe
         #

From pypy.commits at gmail.com  Thu Nov  2 07:03:16 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 02 Nov 2017 04:03:16 -0700 (PDT)
Subject: [pypy-commit] pypy default: make sure to extract also the .debug
 files from the nightly
Message-ID: <59fafb74.4f931c0a.e68c0.dd68@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r92905:060286c2177e
Date: 2017-11-02 12:02 +0100
http://bitbucket.org/pypy/pypy/changeset/060286c2177e/

Log:	make sure to extract also the .debug files from the nightly

diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'"
+    binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):

From pypy.commits at gmail.com  Thu Nov  2 11:06:00 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 08:06:00 -0700 (PDT)
Subject: [pypy-commit] pypy bsd-patches: close branch to be merged
Message-ID: <59fb3458.09a0df0a.46c79.5dd4@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: bsd-patches
Changeset: r92907:5d3a56b266eb
Date: 2017-11-02 16:58 +0200
http://bitbucket.org/pypy/pypy/changeset/5d3a56b266eb/

Log:	close branch to be merged


From pypy.commits at gmail.com  Thu Nov  2 11:06:02 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 08:06:02 -0700 (PDT)
Subject: [pypy-commit] pypy default: merge bsd-patches which fixes failures
 on FreeBSD (David Naylor)
Message-ID: <59fb345a.14a1df0a.783a7.1943@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92908:959da02fe2dc
Date: 2017-11-02 16:59 +0200
http://bitbucket.org/pypy/pypy/changeset/959da02fe2dc/

Log:	merge bsd-patches which fixes failures on FreeBSD (David Naylor)

diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -314,7 +314,7 @@
 # ========================================================================
 
 class W_CDLL(W_Root):
-    def __init__(self, space, name, mode):
+    def __init__(self, space, name, mode, handle):
         self.flags = libffi.FUNCFLAG_CDECL
         self.space = space
         if name is None:
@@ -322,7 +322,7 @@
         else:
             self.name = name
         try:
-            self.cdll = libffi.CDLL(name, mode)
+            self.cdll = libffi.CDLL(name, mode, handle)
         except DLOpenError as e:
             raise wrap_dlopenerror(space, e, self.name)
         except OSError as e:
@@ -344,9 +344,9 @@
     def getidentifier(self, space):
         return space.newint(self.cdll.getidentifier())
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_cdll(space, w_type, name, mode=-1):
-    return W_CDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_cdll(space, w_type, name, mode=-1, handle=0):
+    return W_CDLL(space, name, mode, handle)
 
 
 W_CDLL.typedef = TypeDef(
@@ -359,13 +359,13 @@
     )
 
 class W_WinDLL(W_CDLL):
-    def __init__(self, space, name, mode):
-        W_CDLL.__init__(self, space, name, mode)
+    def __init__(self, space, name, mode, handle):
+        W_CDLL.__init__(self, space, name, mode, handle)
         self.flags = libffi.FUNCFLAG_STDCALL
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_windll(space, w_type, name, mode=-1):
-    return W_WinDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_windll(space, w_type, name, mode=-1, handle=0):
+    return W_WinDLL(space, name, mode, handle)
 
 
 W_WinDLL.typedef = TypeDef(
@@ -380,4 +380,4 @@
 # ========================================================================
 
 def get_libc(space):
-    return W_CDLL(space, get_libc_name(), -1)
+    return W_CDLL(space, get_libc_name(), -1, 0)
diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -1,3 +1,4 @@
+import py
 import sys
 from rpython.tool.udir import udir
 from pypy.tool.pytest.objspace import gettestobjspace
@@ -107,6 +108,7 @@
         _vmprof.disable()
         assert _vmprof.is_enabled() is False
 
+    @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented")
     def test_get_profile_path(self):
         import _vmprof
         tmpfile = open(self.tmpfilename, 'wb')
diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py
--- a/pypy/module/termios/test/test_termios.py
+++ b/pypy/module/termios/test/test_termios.py
@@ -7,9 +7,6 @@
 if os.name != 'posix':
     py.test.skip('termios module only available on unix')
 
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
-
 class TestTermios(object):
     def setup_class(cls):
         try:
diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py
--- a/pypy/module/test_lib_pypy/pyrepl/__init__.py
+++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py
@@ -1,6 +1,3 @@
 import sys
 import lib_pypy.pyrepl
 sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl']
-
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
--- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py
+++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
@@ -4,7 +4,7 @@
 
 
 @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or "
-                    "'kfreebsd' in sys.platform")
+                    "'freebsd' in sys.platform")
 def test_raw_input():
     import os
     import pty
diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py
--- a/pypy/tool/cpyext/extbuild.py
+++ b/pypy/tool/cpyext/extbuild.py
@@ -244,13 +244,13 @@
     if sys.platform == 'win32':
         compile_extra = ["/we4013"]
         link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')]
-    elif sys.platform == 'darwin':
-        compile_extra = link_extra = None
-        pass
     elif sys.platform.startswith('linux'):
         compile_extra = [
             "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"]
         link_extra = None
+    else:
+        compile_extra = link_extra = None
+        pass
     return ExtensionCompiler(
         builddir_base=base_dir,
         include_extra=[get_python_inc()],
diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py
--- a/rpython/rlib/libffi.py
+++ b/rpython/rlib/libffi.py
@@ -434,11 +434,12 @@
 
 # XXX: it partially duplicate the code in clibffi.py
 class CDLL(object):
-    def __init__(self, libname, mode=-1):
+    def __init__(self, libname, mode=-1, lib=0):
         """Load the library, or raises DLOpenError."""
-        self.lib = rffi.cast(DLLHANDLE, 0)
-        with rffi.scoped_str2charp(libname) as ll_libname:
-            self.lib = dlopen(ll_libname, mode)
+        self.lib = rffi.cast(DLLHANDLE, lib)
+        if lib == 0:
+            with rffi.scoped_str2charp(libname) as ll_libname:
+                self.lib = dlopen(ll_libname, mode)
 
     def __del__(self):
         if self.lib:
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -47,7 +47,10 @@
     # Guessing a BSD-like Unix platform
     compile_extra += ['-DVMPROF_UNIX']
     compile_extra += ['-DVMPROF_MAC']
-    _libs = []
+    if sys.platform.startswith('freebsd'):
+        _libs = ['unwind']
+    else:
+        _libs = []
 
 
 eci_kwds = dict(
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -1147,7 +1147,7 @@
     libc_name = get_libc_name()     # Make sure the name is determined during import, not at runtime
     if _FREEBSD:
         RTLD_DEFAULT = -2  # see <dlfcn.h>
-        rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs)
+        rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs)
     # XXX is this always correct???
     standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs)
 
@@ -1243,7 +1243,7 @@
 
     if cfunc is None:
         if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'):
-            cfunc = get_on_lib(rtld_default_lib, funcname)
+            cfunc = rtld_default_lib[funcname]
         else:
             cfunc = get_on_lib(standard_c_lib, funcname)
         # XXX magic: on Windows try to load the function from 'kernel32' too

From pypy.commits at gmail.com  Thu Nov  2 11:05:58 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 08:05:58 -0700 (PDT)
Subject: [pypy-commit] pypy default: help tests find msv compiler
Message-ID: <59fb3456.cc091c0a.9de42.6f6d@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92906:2177b95b1174
Date: 2017-11-02 16:48 +0200
http://bitbucket.org/pypy/pypy/changeset/2177b95b1174/

Log:	help tests find msv compiler

diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -16,4 +16,10 @@
     with fp:
         imp.load_module('_testcapi', fp, filename, description)
 except ImportError:
+    if os.name == 'nt':
+        # hack around finding compilers on win32
+        try:
+            import setuptools
+        except ImportError:
+            pass
     _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)

From pypy.commits at gmail.com  Thu Nov  2 11:06:04 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 02 Nov 2017 08:06:04 -0700 (PDT)
Subject: [pypy-commit] pypy default: document merge branches
Message-ID: <59fb345c.86081c0a.3b4f8.59cf@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92909:8ee02499c7fd
Date: 2017-11-02 17:08 +0200
http://bitbucket.org/pypy/pypy/changeset/8ee02499c7fd/

Log:	document merge branches

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,10 @@
 
 .. branch: docs-osx-brew-openssl
 
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)

From pypy.commits at gmail.com  Thu Nov  2 12:34:24 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 02 Nov 2017 09:34:24 -0700 (PDT)
Subject: [pypy-commit] pypy run-extra-tests: Add extra_tests/requirements.txt
Message-ID: <59fb4910.d08edf0a.9c5a8.0d80@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: run-extra-tests
Changeset: r92910:c0b920761a24
Date: 2017-11-02 16:33 +0000
http://bitbucket.org/pypy/pypy/changeset/c0b920761a24/

Log:	Add extra_tests/requirements.txt

diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis

From pypy.commits at gmail.com  Thu Nov  2 13:38:53 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 02 Nov 2017 10:38:53 -0700 (PDT)
Subject: [pypy-commit] pypy run-extra-tests: Add a failing test
Message-ID: <59fb582d.028b1c0a.476b5.1747@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: run-extra-tests
Changeset: r92911:1dc82bde8716
Date: 2017-11-02 17:38 +0000
http://bitbucket.org/pypy/pypy/changeset/1dc82bde8716/

Log:	Add a failing test

diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_failing.py
@@ -0,0 +1,8 @@
+from hypothesis import given, strategies
+
+def mean(a, b):
+    return (a + b)/2.
+
+ at given(strategies.integers(), strategies.integers())
+def test_mean_failing(a, b):
+    assert mean(a, b) >= min(a, b)

From pypy.commits at gmail.com  Fri Nov  3 09:39:01 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 03 Nov 2017 06:39:01 -0700 (PDT)
Subject: [pypy-commit] pypy matplotlib: remove array-to-obj conversion,
 do it in matplotlib instead
Message-ID: <59fc7175.a1b6500a.cf022.23b0@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: matplotlib
Changeset: r92913:cfbf50f45366
Date: 2017-11-03 14:19 +0200
http://bitbucket.org/pypy/pypy/changeset/cfbf50f45366/

Log:	remove array-to-obj conversion, do it in matplotlib instead

diff --git a/lib_pypy/_tkinter/tclobj.py b/lib_pypy/_tkinter/tclobj.py
--- a/lib_pypy/_tkinter/tclobj.py
+++ b/lib_pypy/_tkinter/tclobj.py
@@ -2,11 +2,6 @@
 
 from .tklib_cffi import ffi as tkffi, lib as tklib
 import binascii
-try:
-    import numpy as np
-    hasNumpy = True
-except ImportError:
-    hasNumpy = False
 
 class TypeCache(object):
     def __init__(self):
@@ -91,29 +86,6 @@
     finally:
         tklib.mp_clear(bigValue)
 
-def AsObjNDArray(value):
-    # XXX there must be a better way
-    argv = tkffi.new("Tcl_Obj*[]", 3)
-    argv[0] = AsObj(' '.join([str(x) for x in value.shape]))
-    argv[1] = AsObj(value.dtype.str)
-    asstr = value.tostring()
-    argv[2] = AsObj(binascii.b2a_hex(asstr))
-    return tklib.Tcl_NewListObj(3, argv)
-
-def FromTclStringNDArray(data):
-    # unconvert data, assuming it is stringified from AsObjNDArray
-    indx1 = data.find(b'}')
-    shape = map(int, data[1:indx1].split())
-    size = np.prod(shape)
-    indx2 = data.find(b' ', indx1 + 2)
-    dtype = np.dtype(data[indx1 + 2:indx2])
-    start = indx2+1
-    stop = start + size * dtype.itemsize * 2
-    if stop > len(data):
-        raise ValueError('data too short')
-    vals = binascii.a2b_hex(data[start:stop])
-    return np.fromstring(vals, dtype=dtype).reshape(shape)
-
 def FromObj(app, value):
     """Convert a TclObj pointer into a Python object."""
     typeCache = app._typeCache
@@ -202,9 +174,6 @@
     if isinstance(value, TclObject):
         tklib.Tcl_IncrRefCount(value._value)
         return value._value
-    if hasNumpy and isinstance(value, np.ndarray):
-        return AsObjNDArray(value)
-
     return AsObj(str(value))
 
 class TclObject(object):

From pypy.commits at gmail.com  Fri Nov  3 09:39:03 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 03 Nov 2017 06:39:03 -0700 (PDT)
Subject: [pypy-commit] pypy default: rename uu to something more unique,
 maybe fixes tests? (arigato)
Message-ID: <59fc7177.bbb7500a.54de0.dddc@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92914:5c8b7f2cd6b7
Date: 2017-11-03 15:38 +0200
http://bitbucket.org/pypy/pypy/changeset/5c8b7f2cd6b7/

Log:	rename uu to something more unique, maybe fixes tests? (arigato)

diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py
--- a/pypy/module/zipimport/test/test_zipimport.py
+++ b/pypy/module/zipimport/test/test_zipimport.py
@@ -194,9 +194,9 @@
         m0 = ord(self.test_pyc[0])
         m0 ^= 0x04
         test_pyc = chr(m0) + self.test_pyc[1:]
-        self.writefile("uu.pyc", test_pyc)
+        self.writefile("xxbad_pyc.pyc", test_pyc)
         raises(zipimport.ZipImportError,
-               "__import__('uu', globals(), locals(), [])")
+               "__import__('xxbad_pyc', globals(), locals(), [])")
         assert 'uu' not in sys.modules
 
     def test_force_py(self):
@@ -204,9 +204,9 @@
         m0 = ord(self.test_pyc[0])
         m0 ^= 0x04
         test_pyc = chr(m0) + self.test_pyc[1:]
-        self.writefile("uu.pyc", test_pyc)
-        self.writefile("uu.py", "def f(x): return x")
-        mod = __import__("uu", globals(), locals(), [])
+        self.writefile("xxforce_py.pyc", test_pyc)
+        self.writefile("xxforce_py.py", "def f(x): return x")
+        mod = __import__("xxforce_py", globals(), locals(), [])
         assert mod.f(3) == 3
 
     def test_sys_modules(self):

From pypy.commits at gmail.com  Fri Nov  3 09:38:59 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 03 Nov 2017 06:38:59 -0700 (PDT)
Subject: [pypy-commit] pypy matplotlib: merge default into branch
Message-ID: <59fc7173.e184500a.38188.b627@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: matplotlib
Changeset: r92912:112fed2c005c
Date: 2017-11-02 18:27 +0200
http://bitbucket.org/pypy/pypy/changeset/112fed2c005c/

Log:	merge default into branch

diff too long, truncating to 2000 out of 4915 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -71,6 +71,8 @@
 ^lib_pypy/.+.c$
 ^lib_pypy/.+.o$
 ^lib_pypy/.+.so$
+^lib_pypy/.+.pyd$
+^lib_pypy/Release/
 ^pypy/doc/discussion/.+\.html$
 ^include/.+\.h$
 ^include/.+\.inl$
diff --git a/_pytest/terminal.py b/_pytest/terminal.py
--- a/_pytest/terminal.py
+++ b/_pytest/terminal.py
@@ -366,11 +366,11 @@
             EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR,
             EXIT_NOTESTSCOLLECTED)
         if exitstatus in summary_exit_codes:
-            self.config.hook.pytest_terminal_summary(terminalreporter=self)
             self.summary_errors()
             self.summary_failures()
             self.summary_warnings()
             self.summary_passes()
+            self.config.hook.pytest_terminal_summary(terminalreporter=self)
         if exitstatus == EXIT_INTERRUPTED:
             self._report_keyboardinterrupt()
             del self._keyboardinterrupt_memo
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,60 +8,63 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
-        if '_type_' in typedict:
-            ffiarray = _rawffi.Array(typedict['_type_']._ffishape_)
-            res._ffiarray = ffiarray
-            subletter = getattr(typedict['_type_'], '_type_', None)
-            if subletter == 'c':
-                def getvalue(self):
-                    return _rawffi.charp2string(self._buffer.buffer,
-                                                self._length_)
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, str):
-                        _rawffi.rawstring2charp(self._buffer.buffer, val)
-                    else:
-                        for i in range(len(val)):
-                            self[i] = val[i]
-                    if len(val) < self._length_:
-                        self._buffer[len(val)] = '\x00'
-                res.value = property(getvalue, setvalue)
 
-                def getraw(self):
-                    return _rawffi.charp2rawstring(self._buffer.buffer,
-                                                   self._length_)
+        if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
+            return res
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
+        ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
+        subletter = getattr(res._type_, '_type_', None)
+        if subletter == 'c':
+            def getvalue(self):
+                return _rawffi.charp2string(self._buffer.buffer,
+                                            self._length_)
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, str):
+                    _rawffi.rawstring2charp(self._buffer.buffer, val)
+                else:
+                    for i in range(len(val)):
+                        self[i] = val[i]
+                if len(val) < self._length_:
+                    self._buffer[len(val)] = b'\x00'
+            res.value = property(getvalue, setvalue)
 
-                def setraw(self, buffer):
-                    if len(buffer) > self._length_:
-                        raise ValueError("%r too long" % (buffer,))
-                    _rawffi.rawstring2charp(self._buffer.buffer, buffer)
-                res.raw = property(getraw, setraw)
-            elif subletter == 'u':
-                def getvalue(self):
-                    return _rawffi.wcharp2unicode(self._buffer.buffer,
-                                                  self._length_)
+            def getraw(self):
+                return _rawffi.charp2rawstring(self._buffer.buffer,
+                                               self._length_)
 
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, unicode):
-                        target = self._buffer
-                    else:
-                        target = self
-                    for i in range(len(val)):
-                        target[i] = val[i]
-                    if len(val) < self._length_:
-                        target[len(val)] = u'\x00'
-                res.value = property(getvalue, setvalue)
-                
-            if '_length_' in typedict:
-                res._ffishape_ = (ffiarray, typedict['_length_'])
-                res._fficompositesize_ = res._sizeofinstances()
-        else:
-            res._ffiarray = None
+            def setraw(self, buffer):
+                if len(buffer) > self._length_:
+                    raise ValueError("%r too long" % (buffer,))
+                _rawffi.rawstring2charp(self._buffer.buffer, buffer)
+            res.raw = property(getraw, setraw)
+        elif subletter == 'u':
+            def getvalue(self):
+                return _rawffi.wcharp2unicode(self._buffer.buffer,
+                                              self._length_)
+
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, unicode):
+                    target = self._buffer
+                else:
+                    target = self
+                for i in range(len(val)):
+                    target[i] = val[i]
+                if len(val) < self._length_:
+                    target[len(val)] = u'\x00'
+            res.value = property(getvalue, setvalue)
+
+        res._ffishape_ = (ffiarray, res._length_)
+        res._fficompositesize_ = res._sizeofinstances()
         return res
 
     from_address = cdata_from_address
@@ -156,7 +159,7 @@
     l = [self[i] for i in range(start, stop, step)]
     letter = getattr(self._type_, '_type_', None)
     if letter == 'c':
-        return "".join(l)
+        return b"".join(l)
     if letter == 'u':
         return u"".join(l)
     return l
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -176,6 +176,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -114,7 +114,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __nonzero__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -291,6 +291,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1027,21 +1027,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -16,4 +16,10 @@
     with fp:
         imp.load_module('_testcapi', fp, filename, description)
 except ImportError:
+    if os.name == 'nt':
+        # hack around finding compilers on win32
+        try:
+            import setuptools
+        except ImportError:
+            pass
     _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -182,6 +182,57 @@
 technical difficulties.
 
 
+What about numpy, numpypy, micronumpy?
+--------------------------------------
+
+Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy.  It
+has two pieces:
+
+  * the builtin module :source:`pypy/module/micronumpy`: this is written in
+    RPython and roughly covers the content of the ``numpy.core.multiarray``
+    module. Confusingly enough, this is available in PyPy under the name
+    ``_numpypy``.  It is included by default in all the official releases of
+    PyPy (but it might be dropped in the future).
+
+  * a fork_ of the official numpy repository maintained by us and informally
+    called ``numpypy``: even more confusing, the name of the repo on bitbucket
+    is ``numpy``.  The main difference with the upstream numpy, is that it is
+    based on the micronumpy module written in RPython, instead of of
+    ``numpy.core.multiarray`` which is written in C.
+
+Moreover, it is also possible to install the upstream version of ``numpy``:
+its core is written in C and it runs on PyPy under the cpyext compatibility
+layer. This is what you get if you do ``pypy -m pip install numpy``.
+
+
+Should I install numpy or numpypy?
+-----------------------------------
+
+TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip
+install numpy``.  You might also be interested in using the experimental `PyPy
+binary wheels`_ to save compilation time.
+
+The upstream ``numpy`` is written in C, and runs under the cpyext
+compatibility layer.  Nowadays, cpyext is mature enough that you can simply
+use the upstream ``numpy``, since it passes 99.9% of the test suite. At the
+moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext
+is infamously slow, and thus it has worse performance compared to
+``numpypy``. However, we are actively working on improving it, as we expect to
+reach the same speed, eventually.
+
+On the other hand, ``numpypy`` is more JIT-friendly and very fast to call,
+since it is written in RPython: but it is a reimplementation, and it's hard to
+be completely compatible: over the years the project slowly matured and
+eventually it was able to call out to the LAPACK and BLAS libraries to speed
+matrix calculations, and reached around an 80% parity with the upstream
+numpy. However, 80% is far from 100%.  Since cpyext/numpy compatibility is
+progressing fast, we have discontinued support for ``numpypy``.
+
+.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html
+.. _fork: https://bitbucket.org/pypy/numpy
+.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels
+
+
 Is PyPy more clever than CPython about Tail Calls?
 --------------------------------------------------
 
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -240,9 +240,12 @@
 
 **matplotlib** https://github.com/matplotlib/matplotlib
 
-    TODO: the tkagg backend does not work, which makes tests fail on downstream
-    projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in 
-    tkagg.py, which requires refactoring
+    Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of
+    matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the
+    tkagg backend can function.
+
+    TODO: the matplotlib branch passes numpy arrays by value (copying all the
+    data), this proof-of-concept needs help to become completely compliant
 
 **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,4 +5,15 @@
 .. this is a revision shortly after release-pypy2.7-v5.9.0
 .. startrev:d56dadcef996
 
+.. branch: cppyy-packaging
+Cleanup and improve cppyy packaging
 
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'"
+    binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -66,20 +66,17 @@
                             "position %d from error handler out of bounds",
                             newpos)
             replace = space.unicode_w(w_replace)
-            return replace, newpos
+            if decode:
+                return replace, newpos
+            else:
+                return replace, None, newpos
         return call_errorhandler
 
     def make_decode_errorhandler(self, space):
         return self._make_errorhandler(space, True)
 
     def make_encode_errorhandler(self, space):
-        errorhandler = self._make_errorhandler(space, False)
-        def encode_call_errorhandler(errors, encoding, reason, input, startpos,
-                                     endpos):
-            replace, newpos = errorhandler(errors, encoding, reason, input,
-                                           startpos, endpos)
-            return replace, None, newpos
-        return encode_call_errorhandler
+        return self._make_errorhandler(space, False)
 
     def get_unicodedata_handler(self, space):
         if self.unicodedata_handler:
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -290,66 +290,87 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
         #
-        def g(c):
+        def bar(c):
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py
--- a/pypy/module/_cppyy/__init__.py
+++ b/pypy/module/_cppyy/__init__.py
@@ -7,7 +7,7 @@
     interpleveldefs = {
         '_resolve_name'          : 'interp_cppyy.resolve_name',
         '_scope_byname'          : 'interp_cppyy.scope_byname',
-        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_is_template'           : 'interp_cppyy.is_template',
         '_std_string_name'       : 'interp_cppyy.std_string_name',
         '_set_class_generator'   : 'interp_cppyy.set_class_generator',
         '_set_function_generator': 'interp_cppyy.set_function_generator',
@@ -15,7 +15,9 @@
         '_get_nullptr'           : 'interp_cppyy.get_nullptr',
         'CPPClassBase'           : 'interp_cppyy.W_CPPClass',
         'addressof'              : 'interp_cppyy.addressof',
+        '_bind_object'           : 'interp_cppyy._bind_object',
         'bind_object'            : 'interp_cppyy.bind_object',
+        'move'                   : 'interp_cppyy.move',
     }
 
     appleveldefs = {
diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py
--- a/pypy/module/_cppyy/capi/loadable_capi.py
+++ b/pypy/module/_cppyy/capi/loadable_capi.py
@@ -217,7 +217,8 @@
             'method_req_args'          : ([c_scope, c_index],         c_int),
             'method_arg_type'          : ([c_scope, c_index, c_int],  c_ccharp),
             'method_arg_default'       : ([c_scope, c_index, c_int],  c_ccharp),
-            'method_signature'         : ([c_scope, c_index],         c_ccharp),
+            'method_signature'         : ([c_scope, c_index, c_int],  c_ccharp),
+            'method_prototype'         : ([c_scope, c_index, c_int],  c_ccharp),
 
             'method_is_template'       : ([c_scope, c_index],         c_int),
             'method_num_template_args' : ([c_scope, c_index],         c_int),
@@ -498,9 +499,12 @@
 def c_method_arg_default(space, cppscope, index, arg_index):
     args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)]
     return charp2str_free(space, call_capi(space, 'method_arg_default', args))
-def c_method_signature(space, cppscope, index):
-    args = [_ArgH(cppscope.handle), _ArgL(index)]
+def c_method_signature(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
     return charp2str_free(space, call_capi(space, 'method_signature', args))
+def c_method_prototype(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
+    return charp2str_free(space, call_capi(space, 'method_prototype', args))
 
 def c_method_is_template(space, cppscope, index):
     args = [_ArgH(cppscope.handle), _ArgL(index)]
diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py
--- a/pypy/module/_cppyy/converter.py
+++ b/pypy/module/_cppyy/converter.py
@@ -4,7 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rawrefcount
 
 from pypy.module._rawffi.interp_rawffi import letter2tp
 from pypy.module._rawffi.array import W_Array, W_ArrayInstance
@@ -21,9 +21,9 @@
 # match for the qualified type.
 
 
-def get_rawobject(space, w_obj):
+def get_rawobject(space, w_obj, can_be_None=True):
     from pypy.module._cppyy.interp_cppyy import W_CPPClass
-    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None)
     if cppinstance:
         rawobject = cppinstance.get_rawobject()
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
@@ -48,17 +48,16 @@
     return capi.C_NULL_OBJECT
 
 def is_nullpointer_specialcase(space, w_obj):
-    # 0, None, and nullptr may serve as "NULL", check for any of them
+    # 0 and nullptr may serve as "NULL"
 
     # integer 0
     try:
         return space.int_w(w_obj) == 0
     except Exception:
         pass
-    # None or nullptr
+    # C++-style nullptr
     from pypy.module._cppyy import interp_cppyy
-    return space.is_true(space.is_(w_obj, space.w_None)) or \
-        space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
+    return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
 
 def get_rawbuffer(space, w_obj):
     # raw buffer
@@ -74,7 +73,7 @@
             return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space)))
     except Exception:
         pass
-    # pre-defined NULL
+    # pre-defined nullptr
     if is_nullpointer_specialcase(space, w_obj):
         return rffi.cast(rffi.VOIDP, 0)
     raise TypeError("not an addressable buffer")
@@ -392,6 +391,7 @@
     _immutable_fields_ = ['typecode']
     typecode = 'g'
 
+
 class CStringConverter(TypeConverter):
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.LONGP, address)
@@ -408,18 +408,27 @@
     def free_argument(self, space, arg, call_local):
         lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw')
 
+class CStringConverterWithSize(CStringConverter):
+    _immutable_fields_ = ['size']
+
+    def __init__(self, space, extra):
+        self.size = extra
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        charpptr = rffi.cast(rffi.CCHARP, address)
+        strsize = self.size
+        if charpptr[self.size-1] == '\0':
+            strsize = self.size-1  # rffi will add \0 back
+        return space.newbytes(rffi.charpsize2str(charpptr, strsize))
+
 
 class VoidPtrConverter(TypeConverter):
     def _unwrap_object(self, space, w_obj):
         try:
             obj = get_rawbuffer(space, w_obj)
         except TypeError:
-            try:
-                # TODO: accept a 'capsule' rather than naked int
-                # (do accept int(0), though)
-                obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj))
-            except Exception:
-                obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+            obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False))
         return obj
 
     def cffi_type(self, space):
@@ -463,12 +472,12 @@
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.VOIDPP, address)
         ba = rffi.cast(rffi.CCHARP, address)
-        r = rffi.cast(rffi.VOIDPP, call_local)
         try:
-            r[0] = get_rawbuffer(space, w_obj)
+            x[0] = get_rawbuffer(space, w_obj)
         except TypeError:
+            r = rffi.cast(rffi.VOIDPP, call_local)
             r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
-        x[0] = rffi.cast(rffi.VOIDP, call_local)
+            x[0] = rffi.cast(rffi.VOIDP, call_local)
         ba[capi.c_function_arg_typeoffset(space)] = self.typecode
 
     def finalize_call(self, space, w_obj, call_local):
@@ -495,9 +504,13 @@
     def _unwrap_object(self, space, w_obj):
         from pypy.module._cppyy.interp_cppyy import W_CPPClass
         if isinstance(w_obj, W_CPPClass):
-            if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl):
+            from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                # reject moves as all are explicit
+                raise ValueError("lvalue expected")
+            if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl):
                 rawobject = w_obj.get_rawobject()
-                offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1)
+                offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1)
                 obj_address = capi.direct_ptradd(rawobject, offset)
                 return rffi.cast(capi.C_OBJECT, obj_address)
         raise oefmt(space.w_TypeError,
@@ -518,6 +531,17 @@
         x = rffi.cast(rffi.VOIDPP, address)
         x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
 
+class InstanceMoveConverter(InstanceRefConverter):
+    def _unwrap_object(self, space, w_obj):
+        # moving is same as by-ref, but have to check that move is allowed
+        from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE
+        if isinstance(w_obj, W_CPPClass):
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE
+                return InstanceRefConverter._unwrap_object(self, space, w_obj)
+        raise oefmt(space.w_ValueError, "object is not an rvalue")
+
+
 class InstanceConverter(InstanceRefConverter):
 
     def convert_argument_libffi(self, space, w_obj, address, call_local):
@@ -527,7 +551,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         self._is_abstract(space)
@@ -548,7 +572,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset))
@@ -582,8 +606,8 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl,
-                                           do_cast=False, is_ref=True)
+        return interp_cppyy.wrap_cppinstance(
+            space, address, self.clsdecl, do_cast=False, is_ref=True)
 
 class StdStringConverter(InstanceConverter):
 
@@ -606,7 +630,7 @@
             assign = self.clsdecl.get_overload("__assign__")
             from pypy.module._cppyy import interp_cppyy
             assign.call(
-                interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value])
+                interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value])
         except Exception:
             InstanceConverter.to_memory(self, space, w_obj, w_value, offset)
 
@@ -672,7 +696,7 @@
 
 _converters = {}         # builtin and custom types
 _a_converters = {}       # array and ptr versions of above
-def get_converter(space, name, default):
+def get_converter(space, _name, default):
     # The matching of the name to a converter should follow:
     #   1) full, exact match
     #       1a) const-removed match
@@ -680,9 +704,9 @@
     #   3) accept ref as pointer (for the stubs, const& can be
     #       by value, but that does not work for the ffi path)
     #   4) generalized cases (covers basically all user classes)
-    #   5) void converter, which fails on use
+    #   5) void* or void converter (which fails on use)
 
-    name = capi.c_resolve_name(space, name)
+    name = capi.c_resolve_name(space, _name)
 
     #   1) full, exact match
     try:
@@ -701,7 +725,7 @@
     clean_name = capi.c_resolve_name(space, helper.clean_type(name))
     try:
         # array_index may be negative to indicate no size or no size found
-        array_size = helper.array_size(name)
+        array_size = helper.array_size(_name)     # uses original arg
         return _a_converters[clean_name+compound](space, array_size)
     except KeyError:
         pass
@@ -719,6 +743,8 @@
             return InstancePtrConverter(space, clsdecl)
         elif compound == "&":
             return InstanceRefConverter(space, clsdecl)
+        elif compound == "&&":
+            return InstanceMoveConverter(space, clsdecl)
         elif compound == "**":
             return InstancePtrPtrConverter(space, clsdecl)
         elif compound == "":
@@ -726,11 +752,13 @@
     elif capi.c_is_enum(space, clean_name):
         return _converters['unsigned'](space, default)
 
-    #   5) void converter, which fails on use
-    #
+    #   5) void* or void converter (which fails on use)
+    if 0 <= compound.find('*'):
+        return VoidPtrConverter(space, default)  # "user knows best"
+
     # return a void converter here, so that the class can be build even
-    # when some types are unknown; this overload will simply fail on use
-    return VoidConverter(space, name)
+    # when some types are unknown
+    return VoidConverter(space, name)            # fails on use
 
 
 _converters["bool"]                     = BoolConverter
@@ -847,6 +875,10 @@
         for name in names:
             _a_converters[name+'[]'] = ArrayConverter
             _a_converters[name+'*']  = PtrConverter
+
+    # special case, const char* w/ size and w/o '\0'
+    _a_converters["const char[]"] = CStringConverterWithSize
+
 _build_array_converters()
 
 # add another set of aliased names
diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py
--- a/pypy/module/_cppyy/executor.py
+++ b/pypy/module/_cppyy/executor.py
@@ -159,7 +159,7 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
         return pyres
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
@@ -167,7 +167,7 @@
         result = rffi.ptradd(buffer, cif_descr.exchange_result)
         from pypy.module._cppyy import interp_cppyy
         ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
 class InstancePtrPtrExecutor(InstancePtrExecutor):
 
@@ -176,7 +176,7 @@
         voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args)
         ref_address = rffi.cast(rffi.VOIDPP, voidp_result)
         ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
@@ -188,8 +188,8 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass,
-                                           do_cast=False, python_owns=True, fresh=True)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass,
+                                             do_cast=False, python_owns=True, fresh=True)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h
--- a/pypy/module/_cppyy/include/capi.h
+++ b/pypy/module/_cppyy/include/capi.h
@@ -19,14 +19,15 @@
     RPY_EXTERN
     int cppyy_num_scopes(cppyy_scope_t parent);
     RPY_EXTERN
-    char* cppyy_scope_name(cppyy_scope_t parent, int iscope);
-
+    char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope);
     RPY_EXTERN
     char* cppyy_resolve_name(const char* cppitem_name);
     RPY_EXTERN
     cppyy_scope_t cppyy_get_scope(const char* scope_name);
     RPY_EXTERN
     cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj);
+    RPY_EXTERN
+    size_t cppyy_size_of(cppyy_type_t klass);
 
     /* memory management ------------------------------------------------------ */
     RPY_EXTERN
@@ -120,6 +121,8 @@
     RPY_EXTERN
     char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
+    char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx);
+    RPY_EXTERN
     char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
     int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx);
@@ -130,7 +133,9 @@
     RPY_EXTERN
     char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index);
     RPY_EXTERN
-    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx);
+    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
+    RPY_EXTERN
+    char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
 
     RPY_EXTERN
     int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx);
@@ -147,8 +152,12 @@
 
     /* method properties ------------------------------------------------------ */
     RPY_EXTERN
+    int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx);
     RPY_EXTERN
+    int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx);
 
     /* data member reflection information ------------------------------------- */
diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py
--- a/pypy/module/_cppyy/interp_cppyy.py
+++ b/pypy/module/_cppyy/interp_cppyy.py
@@ -2,7 +2,7 @@
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
 from pypy.interpreter.baseobjspace import W_Root
 
 from rpython.rtyper.lltypesystem import rffi, lltype, llmemory
@@ -15,6 +15,10 @@
 from pypy.module._cppyy import converter, executor, ffitypes, helper
 
 
+INSTANCE_FLAGS_PYTHON_OWNS = 0x0001
+INSTANCE_FLAGS_IS_REF      = 0x0002
+INSTANCE_FLAGS_IS_R_VALUE  = 0x0004
+
 class FastCallNotPossible(Exception):
     pass
 
@@ -33,16 +37,21 @@
 
 class State(object):
     def __init__(self, space):
+        # final scoped name -> opaque handle
         self.cppscope_cache = {
-            "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) }
+            'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') }
+        # opaque handle -> app-level python class
+        self.cppclass_registry = {}
+        # app-level class generator callback
+        self.w_clgen_callback = None
+        # app-level function generator callback (currently not used)
+        self.w_fngen_callback = None
+        # C++11's nullptr
         self.w_nullptr = None
-        self.cpptemplate_cache = {}
-        self.cppclass_registry = {}
-        self.w_clgen_callback = None
-        self.w_fngen_callback = None
 
 def get_nullptr(space):
-    if hasattr(space, "fake"):
+    # construct a unique address that compares to NULL, serves as nullptr
+    if hasattr(space, 'fake'):
         raise NotImplementedError
     state = space.fromcache(State)
     if state.w_nullptr is None:
@@ -58,52 +67,48 @@
         state.w_nullptr = nullarr
     return state.w_nullptr
 
- at unwrap_spec(name='text')
-def resolve_name(space, name):
-    return space.newtext(capi.c_resolve_name(space, name))
+ at unwrap_spec(scoped_name='text')
+def resolve_name(space, scoped_name):
+    return space.newtext(capi.c_resolve_name(space, scoped_name))
 
- at unwrap_spec(name='text')
-def scope_byname(space, name):
-    true_name = capi.c_resolve_name(space, name)
 
+# memoized lookup of handles by final, scoped, name of classes/namespaces
+ at unwrap_spec(final_scoped_name='text')
+def scope_byname(space, final_scoped_name):
     state = space.fromcache(State)
     try:
-        return state.cppscope_cache[true_name]
+        return state.cppscope_cache[final_scoped_name]
     except KeyError:
         pass
 
-    opaque_handle = capi.c_get_scope_opaque(space, true_name)
+    opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name)
     assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
     if opaque_handle:
-        final_name = capi.c_final_name(space, opaque_handle)
-        if capi.c_is_namespace(space, opaque_handle):
-            cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle)
-        elif capi.c_has_complex_hierarchy(space, opaque_handle):
-            cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle)
+        isns = capi.c_is_namespace(space, opaque_handle)
+        if isns:
+            cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name)
         else:
-            cppscope = W_CPPClassDecl(space, final_name, opaque_handle)
-        state.cppscope_cache[name] = cppscope
+            if capi.c_has_complex_hierarchy(space, opaque_handle):
+                cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name)
+            else:
+                cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name)
 
-        cppscope._build_methods()
-        cppscope._find_datamembers()
+        # store in the cache to prevent recursion
+        state.cppscope_cache[final_scoped_name] = cppscope
+
+        if not isns:
+            # build methods/data; TODO: also defer this for classes (a functional __dir__
+            # and instrospection for help() is enough and allows more lazy loading)
+            cppscope._build_methods()
+            cppscope._find_datamembers()
+
         return cppscope
 
     return None
 
- at unwrap_spec(name='text')
-def template_byname(space, name):
-    state = space.fromcache(State)
-    try:
-        return state.cpptemplate_cache[name]
-    except KeyError:
-        pass
-
-    if capi.c_is_template(space, name):
-        cpptemplate = W_CPPTemplateType(space, name)
-        state.cpptemplate_cache[name] = cpptemplate
-        return cpptemplate
-
-    return None
+ at unwrap_spec(final_scoped_name='text')
+def is_template(space, final_scoped_name):
+    return space.newbool(capi.c_is_template(space, final_scoped_name))
 
 def std_string_name(space):
     return space.newtext(capi.std_string_name)
@@ -189,8 +194,13 @@
         # check number of given arguments against required (== total - defaults)
         args_expected = len(self.arg_defs)
         args_given = len(args_w)
-        if args_expected < args_given or args_given < self.args_required:
-            raise oefmt(self.space.w_TypeError, "wrong number of arguments")
+
+        if args_given < self.args_required:
+            raise oefmt(self.space.w_TypeError,
+                "takes at least %d arguments (%d given)", self.args_required, args_given)
+        elif args_expected < args_given:
+            raise oefmt(self.space.w_TypeError,
+                "takes at most %d arguments (%d given)", args_expected, args_given)
 
         # initial setup of converters, executors, and libffi (if available)
         if self.converters is None:
@@ -376,8 +386,11 @@
             conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i)
         capi.c_deallocate_function_args(self.space, args)
 
-    def signature(self):
-        return capi.c_method_signature(self.space, self.scope, self.index)
+    def signature(self, show_formalargs=True):
+        return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs)
+
+    def prototype(self, show_formalargs=True):
+        return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs)
 
     def priority(self):
         total_arg_priority = 0
@@ -391,7 +404,7 @@
             lltype.free(self.cif_descr, flavor='raw')
 
     def __repr__(self):
-        return "CPPMethod: %s" % self.signature()
+        return "CPPMethod: %s" % self.prototype()
 
     def _freeze_(self):
         assert 0, "you should never have a pre-built instance of this!"
@@ -407,7 +420,7 @@
         return capi.C_NULL_OBJECT
 
     def __repr__(self):
-        return "CPPFunction: %s" % self.signature()
+        return "CPPFunction: %s" % self.prototype()
 
 
 class CPPTemplatedCall(CPPMethod):
@@ -440,7 +453,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPTemplatedCall: %s" % self.signature()
+        return "CPPTemplatedCall: %s" % self.prototype()
 
 
 class CPPConstructor(CPPMethod):
@@ -462,7 +475,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPConstructor: %s" % self.signature()
+        return "CPPConstructor: %s" % self.prototype()
 
 
 class CPPSetItem(CPPMethod):
@@ -549,12 +562,12 @@
                     w_exc_type = e.w_type
                 elif all_same_type and not e.match(self.space, w_exc_type):
                     all_same_type = False
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    '+e.errorstr(self.space)
             except Exception as e:
                 # can not special case this for non-overloaded functions as we anyway need an
                 # OperationError error down from here
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    Exception: '+str(e)
 
         if all_same_type and w_exc_type is not None:
@@ -562,20 +575,20 @@
         else:
             raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg))
 
-    def signature(self):
-        sig = self.functions[0].signature()
+    def prototype(self):
+        sig = self.functions[0].prototype()
         for i in range(1, len(self.functions)):
-            sig += '\n'+self.functions[i].signature()
+            sig += '\n'+self.functions[i].prototype()
         return self.space.newtext(sig)
 
     def __repr__(self):
-        return "W_CPPOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPOverload.typedef = TypeDef(
     'CPPOverload',
     is_static = interp2app(W_CPPOverload.is_static),
     call = interp2app(W_CPPOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPOverload.prototype),
 )
 
 
@@ -591,24 +604,40 @@
     @jit.unroll_safe
     @unwrap_spec(args_w='args_w')
     def call(self, w_cppinstance, args_w):
+        # TODO: factor out the following:
+        if capi.c_is_abstract(self.space, self.scope.handle):
+            raise oefmt(self.space.w_TypeError,
+                        "cannot instantiate abstract class '%s'",
+                        self.scope.name)
         w_result = W_CPPOverload.call(self, w_cppinstance, args_w)
         newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result))
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if cppinstance is not None:
             cppinstance._rawobject = newthis
             memory_regulator.register(cppinstance)
-            return w_cppinstance
-        return wrap_cppobject(self.space, newthis, self.functions[0].scope,
-                              do_cast=False, python_owns=True, fresh=True)
 
     def __repr__(self):
-        return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPConstructorOverload.typedef = TypeDef(
     'CPPConstructorOverload',
     is_static = interp2app(W_CPPConstructorOverload.is_static),
     call = interp2app(W_CPPConstructorOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPConstructorOverload.prototype),
+)
+
+
+class W_CPPTemplateOverload(W_CPPOverload):
+    @unwrap_spec(args_w='args_w')
+    def __getitem__(self, args_w):
+        pass
+
+    def __repr__(self):
+        return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions]
+
+W_CPPTemplateOverload.typedef = TypeDef(
+    'CPPTemplateOverload',
+    __getitem__ = interp2app(W_CPPTemplateOverload.call),
 )
 
 
@@ -622,6 +651,9 @@
     def __call__(self, args_w):
         return self.method.bound_call(self.cppthis, args_w)
 
+    def __repr__(self):
+        return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions]
+
 W_CPPBoundMethod.typedef = TypeDef(
     'CPPBoundMethod',
     __call__ = interp2app(W_CPPBoundMethod.__call__),
@@ -643,8 +675,8 @@
 
     def _get_offset(self, cppinstance):
         if cppinstance:
-            assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle)
-            offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope)
+            assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle)
+            offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope)
         else:
             offset = self.offset
         return offset
@@ -652,7 +684,7 @@
     def get(self, w_cppinstance, w_pycppclass):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset)
@@ -660,7 +692,7 @@
     def set(self, w_cppinstance, w_value):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         self.converter.to_memory(self.space, w_cppinstance, w_value, offset)
@@ -705,12 +737,12 @@
         return space.w_False
 
 class W_CPPScopeDecl(W_Root):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
     _immutable_fields_ = ['handle', 'name']
 
-    def __init__(self, space, name, opaque_handle):
+    def __init__(self, space, opaque_handle, final_scoped_name):
         self.space = space
-        self.name = name
+        self.name = final_scoped_name
         assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
         self.handle = opaque_handle
         self.methods = {}
@@ -753,7 +785,7 @@
         overload = self.get_overload(name)
         sig = '(%s)' % signature
         for f in overload.functions:
-            if 0 < f.signature().find(sig):
+            if f.signature(False) == sig:
                 return W_CPPOverload(self.space, self, [f])
         raise oefmt(self.space.w_LookupError, "no overload matches signature")
 
@@ -769,6 +801,9 @@
 # classes for inheritance. Both are python classes, though, and refactoring
 # may be in order at some point.
 class W_CPPNamespaceDecl(W_CPPScopeDecl):
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name']
+
     def _make_cppfunction(self, pyname, index):
         num_args = capi.c_method_num_args(self.space, self, index)
         args_required = capi.c_method_req_args(self.space, self, index)
@@ -779,9 +814,6 @@
             arg_defs.append((arg_type, arg_dflt))
         return CPPFunction(self.space, self, index, arg_defs, args_required)
 
-    def _build_methods(self):
-        pass       # force lazy lookups in namespaces
-
     def _make_datamember(self, dm_name, dm_idx):
         type_name = capi.c_datamember_type(self.space, self, dm_idx)
         offset = capi.c_datamember_offset(self.space, self, dm_idx)
@@ -791,9 +823,6 @@
         self.datamembers[dm_name] = datamember
         return datamember
 
-    def _find_datamembers(self):
-        pass       # force lazy lookups in namespaces
-
     def find_overload(self, meth_name):
         indices = capi.c_method_indices_from_name(self.space, self, meth_name)
         if not indices:
@@ -855,18 +884,21 @@
 
 
 class W_CPPClassDecl(W_CPPScopeDecl):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
-    _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]']
 
     def _build_methods(self):
         assert len(self.methods) == 0
         methods_temp = {}
         for i in range(capi.c_num_methods(self.space, self)):
             idx = capi.c_method_index_at(self.space, self, i)
-            pyname = helper.map_operator_name(self.space,
-                capi.c_method_name(self.space, self, idx),
-                capi.c_method_num_args(self.space, self, idx),
-                capi.c_method_result_type(self.space, self, idx))
+            if capi.c_is_constructor(self.space, self, idx):
+                pyname = '__init__'
+            else:
+                pyname = helper.map_operator_name(self.space,
+                    capi.c_method_name(self.space, self, idx),
+                    capi.c_method_num_args(self.space, self, idx),
+                    capi.c_method_result_type(self.space, self, idx))
             cppmethod = self._make_cppfunction(pyname, idx)
             methods_temp.setdefault(pyname, []).append(cppmethod)
         # the following covers the case where the only kind of operator[](idx)
@@ -883,7 +915,7 @@
         # create the overload methods from the method sets
         for pyname, methods in methods_temp.iteritems():
             CPPMethodSort(methods).sort()
-            if pyname == self.name:
+            if pyname == '__init__':
                 overload = W_CPPConstructorOverload(self.space, self, methods[:])
             else:
                 overload = W_CPPOverload(self.space, self, methods[:])
@@ -934,11 +966,11 @@
         raise self.missing_attribute_error(name)
 
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return 0
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return cppinstance.get_rawobject()
 
     def is_namespace(self):
@@ -973,13 +1005,13 @@
 
 class W_CPPComplexClassDecl(W_CPPClassDecl):
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = capi.c_base_offset(self.space,
                                     self, calling_scope, cppinstance.get_rawobject(), 1)
         return offset
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = self.get_base_offset(cppinstance, calling_scope)
         return capi.direct_ptradd(cppinstance.get_rawobject(), offset)
 
@@ -997,70 +1029,56 @@
 W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False
 
 
-class W_CPPTemplateType(W_Root):
-    _attrs_ = ['space', 'name']
-    _immutable_fields = ['name']
-
-    def __init__(self, space, name):
-        self.space = space
-        self.name = name
-
-    @unwrap_spec(args_w='args_w')
-    def __call__(self, args_w):
-        # TODO: this is broken but unused (see pythonify.py)
-        fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>'])
-        return scope_byname(self.space, fullname)
-
-W_CPPTemplateType.typedef = TypeDef(
-    'CPPTemplateType',
-    __call__ = interp2app(W_CPPTemplateType.__call__),
-)
-W_CPPTemplateType.typedef.acceptable_as_base_class = False
-
-
 class W_CPPClass(W_Root):
-    _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns',
+    _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags',
                'finalizer_registered']
-    _immutable_fields_ = ["cppclass", "isref"]
+    _immutable_fields_ = ['clsdecl']
 
     finalizer_registered = False
 
-    def __init__(self, space, cppclass, rawobject, isref, python_owns):
+    def __init__(self, space, decl, rawobject, isref, python_owns):
         self.space = space
-        self.cppclass = cppclass
+        self.clsdecl = decl
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
         assert not isref or rawobject
         self._rawobject = rawobject
         assert not isref or not python_owns
-        self.isref = isref
-        self.python_owns = python_owns
-        self._opt_register_finalizer()
+        self.flags = 0
+        if isref:
+            self.flags |= INSTANCE_FLAGS_IS_REF
+        if python_owns:
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
 
     def _opt_register_finalizer(self):
-        if self.python_owns and not self.finalizer_registered \
-               and not hasattr(self.space, "fake"):
+        if not self.finalizer_registered and not hasattr(self.space, "fake"):
+            assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS
             self.register_finalizer(self.space)
             self.finalizer_registered = True
 
     def _nullcheck(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             raise oefmt(self.space.w_ReferenceError,
                         "trying to access a NULL pointer")
 
     # allow user to determine ownership rules on a per object level
     def fget_python_owns(self, space):
-        return space.newbool(self.python_owns)
+        return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS))
 
     @unwrap_spec(value=bool)
     def fset_python_owns(self, space, value):
-        self.python_owns = space.is_true(value)
-        self._opt_register_finalizer()
+        if space.is_true(value):
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
+        else:
+            self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS
 
     def get_cppthis(self, calling_scope):
-        return self.cppclass.get_cppthis(self, calling_scope)
+        return self.clsdecl.get_cppthis(self, calling_scope)
 
     def get_rawobject(self):
-        if not self.isref:
+        if not (self.flags & INSTANCE_FLAGS_IS_REF):
             return self._rawobject
         else:
             ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject)
@@ -1078,12 +1096,9 @@
         return None
 
     def instance__init__(self, args_w):
-        if capi.c_is_abstract(self.space, self.cppclass.handle):
-            raise oefmt(self.space.w_TypeError,
-                        "cannot instantiate abstract class '%s'",
-                        self.cppclass.name)
-        constructor_overload = self.cppclass.get_overload(self.cppclass.name)
-        constructor_overload.call(self, args_w)
+        raise oefmt(self.space.w_TypeError,
+                    "cannot instantiate abstract class '%s'",
+                    self.clsdecl.name)
  
     def instance__eq__(self, w_other):
         # special case: if other is None, compare pointer-style
@@ -1099,7 +1114,7 @@
             for name in ["", "__gnu_cxx", "__1"]:
                 nss = scope_byname(self.space, name)
                 meth_idx = capi.c_get_global_operator(
-                    self.space, nss, self.cppclass, other.cppclass, "operator==")
+                    self.space, nss, self.clsdecl, other.clsdecl, "operator==")
                 if meth_idx != -1:
                     f = nss._make_cppfunction("operator==", meth_idx)
                     ol = W_CPPOverload(self.space, nss, [f])
@@ -1118,14 +1133,15 @@
         # fallback 2: direct pointer comparison (the class comparison is needed since
         # the first data member in a struct and the struct have the same address)
         other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False)  # TODO: factor out
-        iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass)
+        iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl)
         return self.space.newbool(iseq)
 
     def instance__ne__(self, w_other):
         return self.space.not_(self.instance__eq__(w_other))
 
     def instance__nonzero__(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             return self.space.w_False
         return self.space.w_True
 
@@ -1134,36 +1150,35 @@
         if w_as_builtin is not None:
             return self.space.len(w_as_builtin)
         raise oefmt(self.space.w_TypeError,
-                    "'%s' has no length", self.cppclass.name)
+                    "'%s' has no length", self.clsdecl.name)
 
     def instance__cmp__(self, w_other):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.cmp(w_as_builtin, w_other)
         raise oefmt(self.space.w_AttributeError,
-                    "'%s' has no attribute __cmp__", self.cppclass.name)
+                    "'%s' has no attribute __cmp__", self.clsdecl.name)
 
     def instance__repr__(self):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.repr(w_as_builtin)
         return self.space.newtext("<%s object at 0x%x>" %
-                               (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
+                               (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
 
     def destruct(self):
-        if self._rawobject and not self.isref:
+        if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF):
             memory_regulator.unregister(self)
-            capi.c_destruct(self.space, self.cppclass, self._rawobject)
+            capi.c_destruct(self.space, self.clsdecl, self._rawobject)
             self._rawobject = capi.C_NULL_OBJECT
 
     def _finalize_(self):
-        if self.python_owns:
+        if self.flags & INSTANCE_FLAGS_PYTHON_OWNS:
             self.destruct()
 
 W_CPPClass.typedef = TypeDef(
     'CPPClass',
-    cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass),
-    _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
+    __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
     __init__ = interp2app(W_CPPClass.instance__init__),
     __eq__ = interp2app(W_CPPClass.instance__eq__),
     __ne__ = interp2app(W_CPPClass.instance__ne__),
@@ -1220,21 +1235,21 @@
     state = space.fromcache(State)
     return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar))
 
-def wrap_cppobject(space, rawobject, cppclass,
-                   do_cast=True, python_owns=False, is_ref=False, fresh=False):
+def wrap_cppinstance(space, rawobject, clsdecl,
+                     do_cast=True, python_owns=False, is_ref=False, fresh=False):
     rawobject = rffi.cast(capi.C_OBJECT, rawobject)
 
     # cast to actual if requested and possible
     w_pycppclass = None
     if do_cast and rawobject:
-        actual = capi.c_actual_class(space, cppclass, rawobject)
-        if actual != cppclass.handle:
+        actual = capi.c_actual_class(space, clsdecl, rawobject)
+        if actual != clsdecl.handle:
             try:
                 w_pycppclass = get_pythonized_cppclass(space, actual)
-                offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1)
+                offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1)
                 rawobject = capi.direct_ptradd(rawobject, offset)
-                w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-                cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
+                w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+                clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False)
             except Exception:
                 # failed to locate/build the derived class, so stick to the base (note
                 # that only get_pythonized_cppclass is expected to raise, so none of
@@ -1242,18 +1257,18 @@
                 pass
 
     if w_pycppclass is None:
-        w_pycppclass = get_pythonized_cppclass(space, cppclass.handle)
+        w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle)
 
     # try to recycle existing object if this one is not newly created
     if not fresh and rawobject:
         obj = memory_regulator.retrieve(rawobject)
-        if obj is not None and obj.cppclass is cppclass:
+        if obj is not None and obj.clsdecl is clsdecl:
             return obj
 
     # fresh creation
     w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass)
     cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False)
-    cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns)
+    cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns)
     memory_regulator.register(cppinstance)
     return w_cppinstance
 
@@ -1264,7 +1279,7 @@
     except TypeError:
         pass
     # attempt to get address of C++ instance
-    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj))
+    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False))
 
 @unwrap_spec(w_obj=W_Root)
 def addressof(space, w_obj):
@@ -1273,19 +1288,30 @@
     return space.newlong(address)
 
 @unwrap_spec(owns=bool, cast=bool)
-def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
-    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False):
     try:
         # attempt address from array or C++ instance
         rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj))
     except Exception:
         # accept integer value as address
         rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj))
-    w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-    if not w_cppclass:
-        w_cppclass = scope_byname(space, space.text_w(w_pycppclass))
-        if not w_cppclass:
+    decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False)
+    return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast)
+
+ at unwrap_spec(owns=bool, cast=bool)
+def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
+    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+    w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+    if not w_clsdecl:
+        w_clsdecl = scope_byname(space, space.text_w(w_pycppclass))
+        if not w_clsdecl:
             raise oefmt(space.w_TypeError,
                         "no such class: %s", space.text_w(w_pycppclass))
-    cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
-    return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns)
+    return _bind_object(space, w_obj, w_clsdecl, owns, cast)
+
+def move(space, w_obj):
+    """Casts the given instance into an C++-style rvalue."""
+    obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    if obj:
+        obj.flags |= INSTANCE_FLAGS_IS_R_VALUE
+    return w_obj
diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py
--- a/pypy/module/_cppyy/pythonify.py
+++ b/pypy/module/_cppyy/pythonify.py
@@ -10,7 +10,7 @@
 class CPPMetaScope(type):
     def __getattr__(self, name):
         try:
-            return get_pycppitem(self, name)  # will cache on self
+            return get_scoped_pycppitem(self, name)  # will cache on self
         except Exception as e:
             raise AttributeError("%s object has no attribute '%s' (details: %s)" %
                                  (self, name, str(e)))
@@ -36,11 +36,14 @@
             self._scope = scope
 
     def _arg_to_str(self, arg):
-        if arg == str:
-            import _cppyy
-            arg = _cppyy._std_string_name()
-        elif type(arg) != str:
-            arg = arg.__name__
+        try:
+            arg = arg.__cppname__
+        except AttributeError:
+            if arg == str:
+                import _cppyy
+                arg = _cppyy._std_string_name()
+            elif type(arg) != str:
+                arg = arg.__name__
         return arg
 
     def __call__(self, *args):
@@ -58,8 +61,36 @@
         return self.__call__(*args)
 
 
-def clgen_callback(name):
-    return get_pycppclass(name)
+def scope_splitter(name):
+    is_open_template, scope = 0, ""
+    for c in name:
+        if c == ':' and not is_open_template:
+            if scope:
+                yield scope
+                scope = ""
+            continue
+        elif c == '<':
+            is_open_template += 1
+        elif c == '>':
+            is_open_template -= 1
+        scope += c
+    yield scope
+
+def get_pycppitem(final_scoped_name):
+    # walk scopes recursively down from global namespace ("::") to get the
+    # actual (i.e. not typedef'ed) class, triggering all necessary creation
+    scope = gbl
+    for name in scope_splitter(final_scoped_name):
+        scope = getattr(scope, name)
+    return scope
+get_pycppclass = get_pycppitem     # currently no distinction, but might
+                                   # in future for performance
+
+
+# callbacks (originating from interp_cppyy.py) to allow interp-level to
+# initiate creation of app-level classes and function
+def clgen_callback(final_scoped_name):
+    return get_pycppclass(final_scoped_name)
 
 def fngen_callback(func, npar): # todo, some kind of arg transform spec
     if npar == 0:
@@ -75,20 +106,19 @@
         return wrapper
 
 
+# construction of namespaces and classes, and their helpers
+def make_module_name(scope):
+    if scope:
+        return scope.__module__ + '.' + scope.__name__
+    return 'cppyy'
+
 def make_static_function(func_name, cppol):
     def function(*args):
         return cppol.call(None, *args)
     function.__name__ = func_name
-    function.__doc__ = cppol.signature()
+    function.__doc__ = cppol.prototype()
     return staticmethod(function)
 
-def make_method(meth_name, cppol):
-    def method(self, *args):
-        return cppol.call(self, *args)
-    method.__name__ = meth_name
-    method.__doc__ = cppol.signature()
-    return method
-
 
 def make_cppnamespace(scope, name, decl):
     # build up a representation of a C++ namespace (namespaces are classes)
@@ -98,20 +128,19 @@
     ns_meta = type(name+'_meta', (CPPMetaNamespace,), {})
 
     # create the python-side C++ namespace representation, cache in scope if given
-    d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ }
+    d = {"__cppdecl__" : decl,
+         "__module__" : make_module_name(scope),
+         "__cppname__" : decl.__cppname__ }
     pyns = ns_meta(name, (CPPNamespace,), d)
     if scope:
         setattr(scope, name, pyns)
 
     # install as modules to allow importing from (note naming: cppyy)
-    modname = 'cppyy.gbl'
-    if scope:
-        modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.')
-    sys.modules[modname] = pyns
+    sys.modules[make_module_name(pyns)] = pyns
     return pyns
 
 def _drop_cycles(bases):
-    # TODO: figure this out, as it seems to be a PyPy bug?!
+    # TODO: figure out why this is necessary?
     for b1 in bases:
         for b2 in bases:
             if not (b1 is b2) and issubclass(b2, b1):
@@ -119,27 +148,37 @@
                 break
     return tuple(bases)
 
-def make_new(class_name):
+
+def make_new(decl):
     def __new__(cls, *args):
         # create a place-holder only as there may be a derived class defined
+        # TODO: get rid of the import and add user-land bind_object that uses
+        # _bind_object (see interp_cppyy.py)
         import _cppyy
-        instance = _cppyy.bind_object(0, class_name, True)
+        instance = _cppyy._bind_object(0, decl, True)
         if not instance.__class__ is cls:
             instance.__class__ = cls     # happens for derived class
         return instance
     return __new__
 
-def make_cppclass(scope, class_name, final_class_name, decl):
+def make_method(meth_name, cppol):
+    def method(self, *args):
+        return cppol.call(self, *args)
+    method.__name__ = meth_name
+    method.__doc__ = cppol.prototype()
+    return method
+
+def make_cppclass(scope, cl_name, decl):
 
     # get a list of base classes for class creation
     bases = [get_pycppclass(base) for base in decl.get_base_names()]
     if not bases:
         bases = [CPPClass,]
     else:
-        # it's technically possible that the required class now has been built
-        # if one of the base classes uses it in e.g. a function interface
+        # it's possible that the required class now has been built if one of
+        # the base classes uses it in e.g. a function interface
         try:
-            return scope.__dict__[final_class_name]
+            return scope.__dict__[cl_name]
         except KeyError:
             pass
 
@@ -147,39 +186,41 @@
     d_meta = {}
 
     # prepare dictionary for python-side C++ class representation
-    def dispatch(self, name, signature):
-        cppol = decl.dispatch(name, signature)
-        return types.MethodType(make_method(name, cppol), self, type(self))
+    def dispatch(self, m_name, signature):
+        cppol = decl.__dispatch__(m_name, signature)
+        return types.MethodType(make_method(m_name, cppol), self, type(self))
     d_class = {"__cppdecl__"   : decl,
+         "__new__"      : make_new(decl),
+         "__module__"   : make_module_name(scope),
          "__cppname__"  : decl.__cppname__,
-         "__new__"      : make_new(class_name),
+         "__dispatch__" : dispatch,
          }
 
     # insert (static) methods into the class dictionary
-    for name in decl.get_method_names():
-        cppol = decl.get_overload(name)
+    for m_name in decl.get_method_names():
+        cppol = decl.get_overload(m_name)
         if cppol.is_static():
-            d_class[name] = make_static_function(name, cppol)
+            d_class[m_name] = make_static_function(m_name, cppol)
         else:
-            d_class[name] = make_method(name, cppol)
+            d_class[m_name] = make_method(m_name, cppol)
 
     # add all data members to the dictionary of the class to be created, and
     # static ones also to the metaclass (needed for property setters)
-    for name in decl.get_datamember_names():
-        cppdm = decl.get_datamember(name)
-        d_class[name] = cppdm
+    for d_name in decl.get_datamember_names():
+        cppdm = decl.get_datamember(d_name)
+        d_class[d_name] = cppdm
         if cppdm.is_static():
-            d_meta[name] = cppdm
+            d_meta[d_name] = cppdm
 
     # create a metaclass to allow properties (for static data write access)
     metabases = [type(base) for base in bases]
-    metacpp = type(CPPMetaScope)(class_name+'_meta', _drop_cycles(metabases), d_meta)
+    metacpp = type(CPPMetaScope)(cl_name+'_meta', _drop_cycles(metabases), d_meta)
 
     # create the python-side C++ class
-    pycls = metacpp(class_name, _drop_cycles(bases), d_class)
+    pycls = metacpp(cl_name, _drop_cycles(bases), d_class)
 
     # store the class on its outer scope
-    setattr(scope, final_class_name, pycls)
+    setattr(scope, cl_name, pycls)
 
     # the call to register will add back-end specific pythonizations and thus
     # needs to run first, so that the generic pythonizations can use them
@@ -192,32 +233,32 @@
     return CPPTemplate(template_name, scope)
 
 
-def get_pycppitem(scope, name):
+def get_scoped_pycppitem(scope, name):
     import _cppyy
 
-    # resolve typedefs/aliases
-    full_name = (scope == gbl) and name or (scope.__name__+'::'+name)
-    true_name = _cppyy._resolve_name(full_name)
-    if true_name != full_name:
-        return get_pycppclass(true_name)
+    # resolve typedefs/aliases: these may cross namespaces, in which case
+    # the lookup must trigger the creation of all necessary scopes
+    scoped_name = (scope == gbl) and name or (scope.__cppname__+'::'+name)
+    final_scoped_name = _cppyy._resolve_name(scoped_name)
+    if final_scoped_name != scoped_name:
+        pycppitem = get_pycppitem(final_scoped_name)
+        # also store on the requested scope (effectively a typedef or pointer copy)
+        setattr(scope, name, pycppitem)
+        return pycppitem
 
     pycppitem = None
 
-    # classes
-    cppitem = _cppyy._scope_byname(true_name)
+    # scopes (classes and namespaces)
+    cppitem = _cppyy._scope_byname(final_scoped_name)
     if cppitem:
-        name = true_name
-        if scope != gbl:
-            name = true_name[len(scope.__cppname__)+2:]
         if cppitem.is_namespace():
             pycppitem = make_cppnamespace(scope, name, cppitem)
-            setattr(scope, name, pycppitem)
         else:
-            pycppitem = make_cppclass(scope, name, true_name, cppitem)
+            pycppitem = make_cppclass(scope, name, cppitem)
 
     # templates
     if not cppitem:
-        cppitem = _cppyy._template_byname(true_name)
+        cppitem = _cppyy._is_template(final_scoped_name)
         if cppitem:
             pycppitem = make_cpptemplatetype(scope, name)
             setattr(scope, name, pycppitem)
@@ -249,29 +290,6 @@
     raise AttributeError("'%s' has no attribute '%s'" % (str(scope), name))
 
 
-def scope_splitter(name):
-    is_open_template, scope = 0, ""
-    for c in name:
-        if c == ':' and not is_open_template:
-            if scope:
-                yield scope
-                scope = ""
-            continue
-        elif c == '<':
-            is_open_template += 1
-        elif c == '>':
-            is_open_template -= 1
-        scope += c
-    yield scope
-
-def get_pycppclass(name):
-    # break up the name, to walk the scopes and get the class recursively
-    scope = gbl
-    for part in scope_splitter(name):
-        scope = getattr(scope, part)
-    return scope
-
-
 # pythonization by decoration (move to their own file?)
 def python_style_getitem(self, idx):
     # python-style indexing: check for size and allow indexing from the back
@@ -346,8 +364,8 @@
     # also the fallback on the indexed __getitem__, but that is slower)
     if not 'vector' in pyclass.__name__[:11] and \
             ('begin' in pyclass.__dict__ and 'end' in pyclass.__dict__):
-        if _cppyy._scope_byname(pyclass.__name__+'::iterator') or \
-                _cppyy._scope_byname(pyclass.__name__+'::const_iterator'):
+        if _cppyy._scope_byname(pyclass.__cppname__+'::iterator') or \
+                _cppyy._scope_byname(pyclass.__cppname__+'::const_iterator'):
             def __iter__(self):
                 i = self.begin()
                 while i != self.end():
@@ -416,17 +434,21 @@
     # pre-create std to allow direct importing
     gbl.std = make_cppnamespace(gbl, 'std', _cppyy._scope_byname('std'))
 
+    # add move cast
+    gbl.std.move = _cppyy.move
+
     # install a type for enums to refer to
     # TODO: this is correct for C++98, not for C++11 and in general there will
     # be the same issue for all typedef'd builtin types
     setattr(gbl, 'internal_enum_type_t', int)
 
-    # install nullptr as a unique reference
-    setattr(gbl, 'nullptr', _cppyy._get_nullptr())

From pypy.commits at gmail.com  Fri Nov  3 10:35:28 2017
From: pypy.commits at gmail.com (stian)
Date: Fri, 03 Nov 2017 07:35:28 -0700 (PDT)
Subject: [pypy-commit] pypy math-improvements: Speed up division slightly
Message-ID: <59fc7eb0.6896df0a.2c39d.62db@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92915:28ef9f10c404
Date: 2017-11-03 15:34 +0100
http://bitbucket.org/pypy/pypy/changeset/28ef9f10c404/

Log:	Speed up division slightly

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2168,12 +2168,13 @@
         if j >= size_v:
             vtop = 0
         else:
-            vtop = v.widedigit(j)
-        assert vtop <= wm1
-        vv = (vtop << SHIFT) | v.widedigit(abs(j-1))
+            vtop = v.widedigit(j) << SHIFT
+        #assert vtop <= wm1
+        vv = vtop | v.widedigit(abs(j-1))
         q = vv / wm1
-        r = vv - wm1 * q
-        while wm2 * q > ((r << SHIFT) | v.widedigit(abs(j-2))):
+        r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q.
+        vj2 = v.widedigit(abs(j-2))
+        while wm2 * q > ((r << SHIFT) | vj2):
             q -= 1
             r += wm1
 

From pypy.commits at gmail.com  Fri Nov  3 11:12:23 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 03 Nov 2017 08:12:23 -0700 (PDT)
Subject: [pypy-commit] pypy default: fix tests
Message-ID: <59fc8757.d0e61c0a.917c0.f6d3@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92916:503b1a72abab
Date: 2017-11-03 17:11 +0200
http://bitbucket.org/pypy/pypy/changeset/503b1a72abab/

Log:	fix tests

diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -213,8 +213,9 @@
     old_dir = os.getcwd()
     try:
         os.chdir(str(builddir))
-        for source, target in binaries:
-            smartstrip(bindir.join(target), keep_debug=options.keep_debug)
+        if not _fake:
+            for source, target in binaries:
+                smartstrip(bindir.join(target), keep_debug=options.keep_debug)
         #
         if USE_ZIPFILE_MODULE:
             import zipfile

From pypy.commits at gmail.com  Fri Nov  3 11:59:53 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 03 Nov 2017 08:59:53 -0700 (PDT)
Subject: [pypy-commit] pypy py3.5: Merged in nanjekye/pypy/os_lockf (pull
 request #575)
Message-ID: <59fc9279.28361c0a.a0b50.337f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92920:1214e3588b0f
Date: 2017-11-03 15:59 +0000
http://bitbucket.org/pypy/pypy/changeset/1214e3588b0f/

Log:	Merged in nanjekye/pypy/os_lockf (pull request #575)

	lockf posix attribute

diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py
--- a/pypy/module/posix/__init__.py
+++ b/pypy/module/posix/__init__.py
@@ -237,9 +237,15 @@
             if getattr(rposix, _name) is not None:
                 interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
 
+    if sys.platform.startswith('linux'):
+        interpleveldefs['lockf'] = 'interp_posix.lockf'
+        for _name in ['F_LOCK', 'F_TLOCK', 'F_ULOCK', 'F_TEST']:
+            if getattr(rposix, _name) is not None:
+                interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
+
     if hasattr(rposix, 'sched_yield'):
         interpleveldefs['sched_yield'] = 'interp_posix.sched_yield'
-
+        
     for _name in ["O_CLOEXEC"]:
         if getattr(rposix, _name) is not None:
             interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2469,6 +2469,19 @@
         else:
            return space.newint(s)
 
+ at unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong)
+def lockf(space, fd, cmd, length):
+    """apply, test or remove a POSIX lock on an 
+    open file.
+    """
+    while True:
+        try:
+            s = rposix.lockf(fd, cmd, length)
+        except OSError as e:
+            wrap_oserror(space, e, eintr_retry=True)
+        else:
+           return space.newint(s)
+
 def sched_yield(space):
     """ Voluntarily relinquish the CPU"""
     while True:
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -1337,6 +1337,17 @@
             posix.close(fd)
             s2.close()
             s1.close()
+            
+        def test_os_lockf(self):
+            posix, os = self.posix, self.os
+            fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT)
+            try:
+                os.write(fd, b'test')
+                os.lseek(fd, 0, 0)
+                posix.lockf(fd, posix.F_LOCK, 4)
+                posix.lockf(fd, posix.F_ULOCK, 4)
+            finally:
+                os.close(fd)
 
     def test_urandom(self):
         os = self.posix
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -276,6 +276,10 @@
     SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER')
     SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH')
     O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK')
+    F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK')
+    F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK')
+    F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK')
+    F_TEST = rffi_platform.DefinedConstantInteger('F_TEST')
     OFF_T = rffi_platform.SimpleType('off_t')
     OFF_T_SIZE = rffi_platform.SizeOf('off_t')
 
@@ -548,6 +552,14 @@
             if error != 0:
                 raise OSError(error, 'posix_fadvise failed')
 
+    c_lockf = external('lockf',
+            [rffi.INT, rffi.INT , OFF_T], rffi.INT,
+            save_err=rffi.RFFI_SAVE_ERRNO)
+    @enforceargs(int, None, None)
+    def lockf(fd, cmd, length):
+        validate_fd(fd)
+        return handle_posix_error('lockf', c_lockf(fd, cmd, length))
+
 c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT,
                        macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO)
 c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT,
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -816,3 +816,14 @@
     if sys.platform != 'win32':
         rposix.sched_yield()
 
+ at rposix_requires('lockf')
+def test_os_lockf():
+    fname = str(udir.join('os_test.txt'))
+    fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777)
+    try:
+        os.write(fd, b'test')
+        os.lseek(fd, 0, 0)
+        rposix.lockf(fd, rposix.F_LOCK, 4)
+        rposix.lockf(fd, rposix.F_ULOCK, 4)
+    finally:
+        os.close(fd)

From pypy.commits at gmail.com  Fri Nov  3 12:00:02 2017
From: pypy.commits at gmail.com (nanjekye)
Date: Fri, 03 Nov 2017 09:00:02 -0700 (PDT)
Subject: [pypy-commit] pypy os_lockf: lockf posixattributes
Message-ID: <59fc9282.4d051c0a.bb8f7.36d8@mx.google.com>

Author: Joannah Nanjekye <nanjekyejoannah at gmail.com>
Branch: os_lockf
Changeset: r92917:287c9946859b
Date: 2017-10-28 22:59 +0300
http://bitbucket.org/pypy/pypy/changeset/287c9946859b/

Log:	lockf posixattributes

diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py
--- a/pypy/module/posix/__init__.py
+++ b/pypy/module/posix/__init__.py
@@ -237,6 +237,12 @@
             if getattr(rposix, _name) is not None:
                 interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
 
+    if sys.platform.startswith('linux'):
+        interpleveldefs['lockf'] = 'interp_posix.lockf'
+        for _name in ['F_LOCK', 'F_TLOCK', 'F_ULOCK', 'F_TEST']:
+            if getattr(rposix, _name) is not None:
+                interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
+
     for _name in ["O_CLOEXEC"]:
         if getattr(rposix, _name) is not None:
             interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2468,3 +2468,16 @@
             wrap_oserror(space, e, eintr_retry=True)
         else:
            return space.newint(s)
+
+ at unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong)
+def lockf(space, fd, cmd, length):
+    """apply, test or remove a POSIX lock on an 
+    open file.
+    """
+    while True:
+        try:
+            s = rposix.lockf(fd, cmd, length)
+        except OSError as e:
+            wrap_oserror(space, e, eintr_retry=True)
+        else:
+           return space.newint(s)
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -1331,6 +1331,17 @@
             posix.close(fd)
             s2.close()
             s1.close()
+            
+        def test_os_lockf(self):
+            posix, os = self.posix, self.os
+            fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT)
+            try:
+                os.write(fd, b'test')
+                os.lseek(fd, 0, 0)
+                posix.lockf(fd, posix.F_LOCK, 4)
+                posix.lockf(fd, posix.F_ULOCK, 4)
+            finally:
+                os.close(fd)
 
     def test_urandom(self):
         os = self.posix
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -276,6 +276,10 @@
     SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER')
     SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH')
     O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK')
+    F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK')
+    F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK')
+    F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK')
+    F_TEST = rffi_platform.DefinedConstantInteger('F_TEST')
     OFF_T = rffi_platform.SimpleType('off_t')
     OFF_T_SIZE = rffi_platform.SizeOf('off_t')
 
@@ -548,6 +552,14 @@
             if error != 0:
                 raise OSError(error, 'posix_fadvise failed')
 
+    c_lockf = external('lockf',
+            [rffi.INT, rffi.INT , OFF_T], rffi.INT,
+            save_err=rffi.RFFI_SAVE_ERRNO)
+    @enforceargs(int, None, None)
+    def lockf(fd, cmd, length):
+        validate_fd(fd)
+        return handle_posix_error('lockf', c_lockf(fd, cmd, length))
+
 c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT,
                        macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO)
 c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT,
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -811,3 +811,14 @@
     assert isinstance(high, int) == True
     assert  high > low
 
+ at rposix_requires('lockf')
+def test_os_lockf():
+    fname = str(udir.join('os_test.txt'))
+    fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777)
+    try:
+        os.write(fd, b'test')
+        os.lseek(fd, 0, 0)
+        rposix.lockf(fd, rposix.F_LOCK, 4)
+        rposix.lockf(fd, rposix.F_ULOCK, 4)
+    finally:
+        os.close(fd)

From pypy.commits at gmail.com  Fri Nov  3 12:00:06 2017
From: pypy.commits at gmail.com (nanjekye)
Date: Fri, 03 Nov 2017 09:00:06 -0700 (PDT)
Subject: [pypy-commit] pypy os_lockf: merge conflict
Message-ID: <59fc9286.46901c0a.da0ea.e0dc@mx.google.com>

Author: Joannah Nanjekye <nanjekyejoannah at gmail.com>
Branch: os_lockf
Changeset: r92918:f3f07f772e02
Date: 2017-10-30 12:57 +0300
http://bitbucket.org/pypy/pypy/changeset/f3f07f772e02/

Log:	merge conflict

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -75,6 +75,8 @@
 ^lib_pypy/.+.c$
 ^lib_pypy/.+.o$
 ^lib_pypy/.+.so$
+^lib_pypy/.+.pyd$
+^lib_pypy/Release/
 ^pypy/doc/discussion/.+\.html$
 ^include/.+\.h$
 ^include/.+\.inl$
diff --git a/lib-python/3/ctypes/test/test_bitfields.py b/lib-python/3/ctypes/test/test_bitfields.py
--- a/lib-python/3/ctypes/test/test_bitfields.py
+++ b/lib-python/3/ctypes/test/test_bitfields.py
@@ -1,5 +1,5 @@
 from ctypes import *
-from ctypes.test import need_symbol
+from ctypes.test import need_symbol, xfail
 import unittest
 import os
 
@@ -279,6 +279,7 @@
         self.assertEqual(b, b'\xef\xcd\xab\x21')
 
     @need_symbol('c_uint32')
+    @xfail
     def test_uint32_swap_big_endian(self):
         # Issue #23319
         class Big(BigEndianStructure):
diff --git a/lib-python/3/ctypes/test/test_byteswap.py b/lib-python/3/ctypes/test/test_byteswap.py
--- a/lib-python/3/ctypes/test/test_byteswap.py
+++ b/lib-python/3/ctypes/test/test_byteswap.py
@@ -2,6 +2,7 @@
 from binascii import hexlify
 
 from ctypes import *
+from test.support import impl_detail
 
 def bin(s):
     return hexlify(memoryview(s)).decode().upper()
@@ -22,6 +23,7 @@
             setattr(bits, "i%s" % i, 1)
             dump(bits)
 
+    @impl_detail("slots are irrelevant on PyPy", pypy=False)
     def test_slots(self):
         class BigPoint(BigEndianStructure):
             __slots__ = ()
diff --git a/lib-python/3/ctypes/test/test_frombuffer.py b/lib-python/3/ctypes/test/test_frombuffer.py
--- a/lib-python/3/ctypes/test/test_frombuffer.py
+++ b/lib-python/3/ctypes/test/test_frombuffer.py
@@ -85,7 +85,6 @@
         del a
         gc.collect()  # Should not crash
 
-    @xfail
     def test_from_buffer_copy(self):
         a = array.array("i", range(16))
         x = (c_int * 16).from_buffer_copy(a)
diff --git a/lib-python/3/test/test_bytes.py b/lib-python/3/test/test_bytes.py
--- a/lib-python/3/test/test_bytes.py
+++ b/lib-python/3/test/test_bytes.py
@@ -721,9 +721,12 @@
         self.assertIs(type(BytesSubclass(A())), BytesSubclass)
 
     # Test PyBytes_FromFormat()
-    @test.support.impl_detail("don't test cpyext here")
     def test_from_format(self):
         test.support.import_module('ctypes')
+        try:
+            from ctypes import pythonapi
+        except ImportError:
+            self.skipTest( "no pythonapi in ctypes")
         from ctypes import pythonapi, py_object, c_int, c_char_p
         PyBytes_FromFormat = pythonapi.PyBytes_FromFormat
         PyBytes_FromFormat.restype = py_object
diff --git a/lib-python/3/test/test_unicode.py b/lib-python/3/test/test_unicode.py
--- a/lib-python/3/test/test_unicode.py
+++ b/lib-python/3/test/test_unicode.py
@@ -2396,6 +2396,10 @@
     # Test PyUnicode_FromFormat()
     def test_from_format(self):
         support.import_module('ctypes')
+        try:
+            from ctypes import pythonapi
+        except ImportError:
+            self.skipTest( "no pythonapi in ctypes")
         from ctypes import (
             pythonapi, py_object, sizeof,
             c_int, c_long, c_longlong, c_ssize_t,
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,9 +8,14 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
+
         if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
             return res
-
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
         ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
         subletter = getattr(res._type_, '_type_', None)
         if subletter == 'c':
@@ -55,7 +60,7 @@
                 for i in range(len(val)):
                     target[i] = val[i]
                 if len(val) < self._length_:
-                    target[len(val)] = '\x00'
+                    target[len(val)] = u'\x00'
             res.value = property(getvalue, setvalue)
 
         res._ffishape_ = (ffiarray, res._length_)
@@ -164,7 +169,7 @@
     if letter == 'c':
         return b"".join(l)
     if letter == 'u':
-        return "".join(l)
+        return u"".join(l)
     return l
 
 class Array(_CData, metaclass=ArrayMeta):
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -165,6 +165,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -113,7 +113,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __bool__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py
--- a/lib_pypy/_ctypes/primitive.py
+++ b/lib_pypy/_ctypes/primitive.py
@@ -232,9 +232,6 @@
 
         elif tp == 'u':
             def _setvalue(self, val):
-                if isinstance(val, bytes):
-                    val = val.decode(ConvMode.encoding, ConvMode.errors)
-                # possible if we use 'ignore'
                 if val:
                     self._buffer[0] = val
             def _getvalue(self):
@@ -243,8 +240,6 @@
 
         elif tp == 'c':
             def _setvalue(self, val):
-                if isinstance(val, str):
-                    val = val.encode(ConvMode.encoding, ConvMode.errors)
                 if val:
                     self._buffer[0] = val
             def _getvalue(self):
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -290,6 +290,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1080,21 +1080,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO
--- a/lib_pypy/cffi.egg-info/PKG-INFO
+++ b/lib_pypy/cffi.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: cffi
-Version: 1.11.1
+Version: 1.11.2
 Summary: Foreign Function Interface for Python calling C code.
 Home-page: http://cffi.readthedocs.org
 Author: Armin Rigo, Maciej Fijalkowski
diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py
--- a/lib_pypy/cffi/__init__.py
+++ b/lib_pypy/cffi/__init__.py
@@ -4,8 +4,8 @@
 from .api import FFI
 from .error import CDefError, FFIError, VerificationError, VerificationMissing
 
-__version__ = "1.11.1"
-__version_info__ = (1, 11, 1)
+__version__ = "1.11.2"
+__version_info__ = (1, 11, 2)
 
 # The verifier module file names are based on the CRC32 of a string that
 # contains the following version number.  It may be older than __version__
diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h
--- a/lib_pypy/cffi/_cffi_include.h
+++ b/lib_pypy/cffi/_cffi_include.h
@@ -238,9 +238,9 @@
 _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x)
 {
     if (sizeof(_cffi_wchar_t) == 2)
-        return _cffi_from_c_wchar_t(x);
+        return _cffi_from_c_wchar_t((_cffi_wchar_t)x);
     else
-        return _cffi_from_c_wchar3216_t(x);
+        return _cffi_from_c_wchar3216_t((int)x);
 }
 
 _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
@@ -254,7 +254,7 @@
 _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
 {
     if (sizeof(_cffi_wchar_t) == 4)
-        return _cffi_from_c_wchar_t(x);
+        return _cffi_from_c_wchar_t((_cffi_wchar_t)x);
     else
         return _cffi_from_c_wchar3216_t(x);
 }
diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h
--- a/lib_pypy/cffi/_embedding.h
+++ b/lib_pypy/cffi/_embedding.h
@@ -247,7 +247,7 @@
 
         if (f != NULL && f != Py_None) {
             PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME
-                               "\ncompiled with cffi version: 1.11.1"
+                               "\ncompiled with cffi version: 1.11.2"
                                "\n_cffi_backend module: ", f);
             modules = PyImport_GetModuleDict();
             mod = PyDict_GetItemString(modules, "_cffi_backend");
diff --git a/pypy/TODO b/pypy/TODO
--- a/pypy/TODO
+++ b/pypy/TODO
@@ -1,18 +1,4 @@
-TODO for the python3 test suite:
-
-* test_memoryview
-   Needs bytes/str changes. Probably easy. Work for this has begun on
-   py3k-memoryview (by mjacob) https://bugs.pypy.org/issue1542
-
-own-tests:
-
-* module/test_lib_pypy
-  These crash the buildbots (via SyntaxErrors): others were really
-  made to run under Python 2.x and so simply fail
-
-* module.cpyext.test.test_structseq test_StructSeq
-  structseq now subclasses tuple on py3, which breaks how
-  BaseCpyTypeDescr.realize allocates it
+...
 
 
 antocuni's older TODO:
@@ -20,14 +6,6 @@
 * run coverage against the parser/astbuilder/astcompiler: it's probably full of
 dead code because the grammar changed
 
-* re-enable strategies https://bugs.pypy.org/issue1540 :
- - re-enable IntDictStrategy
- - re-enable StdObjSpace.listview_str
- - re-enable the kwargs dict strategy in dictmultiobject.py
- - re-enable view_as_kwargs
-
-* unskip numpypy tests in module/test_lib_pypy/numpypy/
-
 * optimize W_UnicodeObject, right now it stores both an unicode and an utf8
 version of the same string
 
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -119,7 +119,7 @@
 
 To run untranslated tests, you need the Boehm garbage collector libgc.
 
-On recent Debian and Ubuntu (like 17.04), this is the command to install
+On recent Debian and Ubuntu (16.04 onwards), this is the command to install
 all build-time dependencies::
 
     apt-get install gcc make libffi-dev pkg-config zlib1g-dev libbz2-dev \
@@ -127,7 +127,7 @@
     tk-dev libgc-dev python-cffi \
     liblzma-dev libncursesw5-dev     # these two only needed on PyPy3
 
-On older Debian and Ubuntu (12.04 to 16.04)::
+On older Debian and Ubuntu (12.04-14.04)::
 
     apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \
     libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \
@@ -149,12 +149,23 @@
     xz-devel # For lzma on PyPy3.
     (XXX plus the SLES11 version of libgdbm-dev and tk-dev)
 
-On Mac OS X, most of these build-time dependencies are installed alongside
+On Mac OS X::
+
+Most of these build-time dependencies are installed alongside
 the Developer Tools. However, note that in order for the installation to
 find them you may need to run::
 
     xcode-select --install
 
+An exception is OpenSSL, which is no longer provided with the operating
+system. It can be obtained via Homebrew (with ``$ brew install openssl``),
+but it will not be available on the system path by default. The easiest
+way to enable it for building pypy is to set an environment variable::
+
+    export PKG_CONFIG_PATH=$(brew --prefix)/opt/openssl/lib/pkgconfig
+
+After setting this, translation (described next) will find the OpenSSL libs
+as expected.
 
 Run the translation
 -------------------
@@ -187,18 +198,18 @@
    entire pypy interpreter. This step is currently singe threaded, and RAM
    hungry. As part of this step,  the chain creates a large number of C code
    files and a Makefile to compile them in a
-   directory controlled by the ``PYPY_USESSION_DIR`` environment variable.  
+   directory controlled by the ``PYPY_USESSION_DIR`` environment variable.
 2. Create an executable ``pypy-c`` by running the Makefile. This step can
-   utilize all possible cores on the machine.  
-3. Copy the needed binaries to the current directory.  
-4. Generate c-extension modules for any cffi-based stdlib modules.  
+   utilize all possible cores on the machine.
+3. Copy the needed binaries to the current directory.
+4. Generate c-extension modules for any cffi-based stdlib modules.
 
 
 The resulting executable behaves mostly like a normal Python
 interpreter (see :doc:`cpython_differences`), and is ready for testing, for
 use as a base interpreter for a new virtualenv, or for packaging into a binary
 suitable for installation on another machine running the same OS as the build
-machine. 
+machine.
 
 Note that step 4 is merely done as a convenience, any of the steps may be rerun
 without rerunning the previous steps.
@@ -255,7 +266,7 @@
 
 * PyPy 2.5.1 or earlier: normal users would see permission errors.
   Installers need to run ``pypy -c "import gdbm"`` and other similar
-  commands at install time; the exact list is in 
+  commands at install time; the exact list is in
   :source:`pypy/tool/release/package.py <package.py>`.  Users
   seeing a broken installation of PyPy can fix it after-the-fact if they
   have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``.
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -240,9 +240,12 @@
 
 **matplotlib** https://github.com/matplotlib/matplotlib
 
-    TODO: the tkagg backend does not work, which makes tests fail on downstream
-    projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in 
-    tkagg.py, which requires refactoring
+    Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of
+    matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the
+    tkagg backend can function.
+
+    TODO: the matplotlib branch passes numpy arrays by value (copying all the
+    data), this proof-of-concept needs help to become completely compliant
 
 **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi
 
diff --git a/pypy/doc/test/test_whatsnew.py b/pypy/doc/test/test_whatsnew.py
--- a/pypy/doc/test/test_whatsnew.py
+++ b/pypy/doc/test/test_whatsnew.py
@@ -89,7 +89,7 @@
     startrev, documented = parse_doc(last_whatsnew)
     merged, branch = get_merged_branches(ROOT, startrev, '')
     merged.discard('default')
-    merged.discard('py3k')
+    merged.discard('py3.5')
     merged.discard('')
     not_documented = merged.difference(documented)
     not_merged = documented.difference(merged)
@@ -100,7 +100,7 @@
     print '\n'.join(not_merged)
     print
     assert not not_documented
-    if branch == 'py3k':
+    if branch == 'py3.5':
         assert not not_merged
     else:
         assert branch in documented, 'Please document this branch before merging: %s' % branch
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -5,4 +5,8 @@
 .. this is a revision shortly after release-pypy2.7-v5.9.0
 .. startrev:d56dadcef996
 
+.. branch: cppyy-packaging
+Cleanup and improve cppyy packaging
 
+.. branch: docs-osx-brew-openssl
+
diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst
--- a/pypy/doc/whatsnew-pypy3-head.rst
+++ b/pypy/doc/whatsnew-pypy3-head.rst
@@ -4,8 +4,3 @@
 
 .. this is the revision after release-pypy3.5-5.9
 .. startrev: be41e3ac0a29
-
-.. branch: multiphase
-
-Implement PyType_FromSpec (PEP 384) and fix issues with PEP 489 support.
-
diff --git a/pypy/interpreter/test/test_interpreter.py b/pypy/interpreter/test/test_interpreter.py
--- a/pypy/interpreter/test/test_interpreter.py
+++ b/pypy/interpreter/test/test_interpreter.py
@@ -1,7 +1,6 @@
 import py 
 import sys
 from pypy.interpreter import gateway, module, error
-from hypothesis import given, strategies
 
 class TestInterpreter: 
 
@@ -300,30 +299,6 @@
         assert "TypeError:" in res
         assert "'tuple' object is not a mapping" in res
 
-    @given(strategies.lists(strategies.one_of(strategies.none(),
-                                 strategies.lists(strategies.none()))))
-    def test_build_map_order(self, shape):
-        value = [10]
-        def build_expr(shape):
-            if shape is None:
-                value[0] += 1
-                return '0: %d' % value[0]
-            else:
-                return '**{%s}' % (', '.join(
-                    [build_expr(shape1) for shape1 in shape]),)
-
-        expr = build_expr(shape)[2:]
-        code = """
-        def f():
-            return %s
-        """ % (expr, )
-        res = self.codetest(code, 'f', [])
-        if value[0] == 10:
-            expected = {}
-        else:
-            expected = {0: value[0]}
-        assert res == expected, "got %r for %r" % (res, expr)
-
     def test_build_map_unpack_with_call(self):
         code = """
         def f(a,b,c,d):
@@ -348,6 +323,36 @@
         assert "TypeError:" in resg4
         assert "got multiple values for keyword argument 'a'" in resg4
 
+try:
+    from hypothesis import given, strategies
+except ImportError:
+    pass
+else:
+    class TestHypothesisInterpreter(TestInterpreter): 
+        @given(strategies.lists(strategies.one_of(strategies.none(),
+                                     strategies.lists(strategies.none()))))
+        def test_build_map_order(self, shape):
+            value = [10]
+            def build_expr(shape):
+                if shape is None:
+                    value[0] += 1
+                    return '0: %d' % value[0]
+                else:
+                    return '**{%s}' % (', '.join(
+                        [build_expr(shape1) for shape1 in shape]),)
+
+            expr = build_expr(shape)[2:]
+            code = """
+            def f():
+                return %s
+            """ % (expr, )
+            res = self.codetest(code, 'f', [])
+            if value[0] == 10:
+                expected = {}
+            else:
+                expected = {0: value[0]}
+            assert res == expected, "got %r for %r" % (res, expr)
+
 
 class AppTestInterpreter: 
     def test_trivial(self):
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -3,7 +3,7 @@
 from rpython.rlib import rdynload, clibffi
 from rpython.rtyper.lltypesystem import rffi
 
-VERSION = "1.11.1"
+VERSION = "1.11.2"
 
 FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI
 try:
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -1,7 +1,7 @@
 # ____________________________________________________________
 
 import sys
-assert __version__ == "1.11.1", ("This test_c.py file is for testing a version"
+assert __version__ == "1.11.2", ("This test_c.py file is for testing a version"
                                  " of cffi that differs from the one that we"
                                  " get from 'import _cffi_backend'")
 if sys.version_info < (3,):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -762,9 +762,16 @@
         assert s == b'\xe9'
 
     def test_lone_surrogates(self):
-        for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
-                         'utf-32', 'utf-32-le', 'utf-32-be'):
+        encodings = ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
+            'utf-32', 'utf-32-le', 'utf-32-be')
+        for encoding in encodings:
             raises(UnicodeEncodeError, u'\ud800'.encode, encoding)
+            assert (u'[\udc80]'.encode(encoding, "backslashreplace") ==
+                '[\\udc80]'.encode(encoding))
+            assert (u'[\udc80]'.encode(encoding, "ignore") ==
+                '[]'.encode(encoding))
+            assert (u'[\udc80]'.encode(encoding, "replace") ==
+                '[?]'.encode(encoding))
 
     def test_charmap_encode(self):
         assert 'xxx'.encode('charmap') == b'xxx'
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -290,66 +290,87 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
         #
-        def g(c):
+        def bar(c):
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -582,6 +582,7 @@
     'PyComplex_AsCComplex', 'PyComplex_FromCComplex',
 
     'PyObject_AsReadBuffer', 'PyObject_AsWriteBuffer', 'PyObject_CheckReadBuffer',
+    'PyBuffer_GetPointer', 'PyBuffer_ToContiguous', 'PyBuffer_FromContiguous',
 
     'PyImport_ImportModuleLevel',
 
diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py
--- a/pypy/module/cpyext/dictobject.py
+++ b/pypy/module/cpyext/dictobject.py
@@ -274,7 +274,10 @@
     if pos == 0:
         # Store the current keys in the PyDictObject.
         decref(space, py_dict.c__tmpkeys)
-        w_keys = space.call_method(space.w_dict, "keys", w_dict)
+        w_keyview = space.call_method(space.w_dict, "keys", w_dict)
+        # w_keys must use the object strategy in order to keep the keys alive
+        w_keys = space.newlist(space.listview(w_keyview))
+        w_keys.switch_to_object_strategy()
         py_dict.c__tmpkeys = create_ref(space, w_keys)
         Py_IncRef(space, py_dict.c__tmpkeys)
     else:
@@ -287,10 +290,10 @@
         decref(space, py_dict.c__tmpkeys)
         py_dict.c__tmpkeys = lltype.nullptr(PyObject.TO)
         return 0
-    w_key = space.listview(w_keys)[pos]
+    w_key = space.listview(w_keys)[pos]  # fast iff w_keys uses object strat
     w_value = space.getitem(w_dict, w_key)
     if pkey:
-        pkey[0]   = as_pyobj(space, w_key)
+        pkey[0] = as_pyobj(space, w_key)
     if pvalue:
         pvalue[0] = as_pyobj(space, w_value)
     return 1
diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h
--- a/pypy/module/cpyext/include/object.h
+++ b/pypy/module/cpyext/include/object.h
@@ -308,6 +308,31 @@
 PyAPI_FUNC(int) PyObject_AsReadBuffer(PyObject *, const void **, Py_ssize_t *);
 PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *, void **, Py_ssize_t *);
 PyAPI_FUNC(int) PyObject_CheckReadBuffer(PyObject *);
+PyAPI_FUNC(void *) PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices);
+/* Get the memory area pointed to by the indices for the buffer given.
+   Note that view->ndim is the assumed size of indices
+*/
+
+PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view,
+                                   Py_ssize_t len, char fort);
+PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf,
+                                     Py_ssize_t len, char fort);
+/* Copy len bytes of data from the contiguous chunk of memory
+   pointed to by buf into the buffer exported by obj.  Return
+   0 on success and return -1 and raise a PyBuffer_Error on
+   error (i.e. the object does not have a buffer interface or
+   it is not working).
+
+   If fort is 'F' and the object is multi-dimensional,
+   then the data will be copied into the array in
+   Fortran-style (first dimension varies the fastest).  If
+   fort is 'C', then the data will be copied into the array
+   in C-style (last dimension varies the fastest).  If fort
+   is 'A', then it does not matter and the copy will be made
+   in whatever way is more efficient.
+
+*/
+
 
 #define PyObject_MALLOC         PyObject_Malloc
 #define PyObject_REALLOC        PyObject_Realloc
diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py
--- a/pypy/module/cpyext/pyobject.py
+++ b/pypy/module/cpyext/pyobject.py
@@ -15,7 +15,7 @@
 from rpython.rlib.objectmodel import keepalive_until_here
 from rpython.rtyper.annlowlevel import llhelper
 from rpython.rlib import rawrefcount, jit
-from rpython.rlib.debug import fatalerror
+from rpython.rlib.debug import ll_assert, fatalerror
 
 
 #________________________________________________________
@@ -243,6 +243,11 @@
         py_obj = rawrefcount.from_obj(PyObject, w_obj)
         if not py_obj:
             py_obj = create_ref(space, w_obj, w_userdata, immortal=immortal)
+        #
+        # Try to crash here, instead of randomly, if we don't keep w_obj alive
+        ll_assert(py_obj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY,
+                  "Bug in cpyext: The W_Root object was garbage-collected "
+                  "while being converted to PyObject.")
         return py_obj
     else:
         return lltype.nullptr(PyObject.TO)
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -13,7 +13,7 @@
     ssizessizeargfunc, ssizeobjargproc, iternextfunc, initproc, richcmpfunc,
     cmpfunc, hashfunc, descrgetfunc, descrsetfunc, objobjproc, objobjargproc,
     getbufferproc, ssizessizeobjargproc)
-from pypy.module.cpyext.pyobject import make_ref, decref, from_ref
+from pypy.module.cpyext.pyobject import make_ref, from_ref, as_pyobj
 from pypy.module.cpyext.pyerrors import PyErr_Occurred
 from pypy.module.cpyext.memoryobject import fill_Py_buffer
 from pypy.module.cpyext.state import State
@@ -90,20 +90,21 @@
     args_w = space.fixedview(w_args)
     return generic_cpy_call(space, func_binary, w_self, args_w[0])
 
+def _get_ob_type(space, w_obj):
+    # please ensure that w_obj stays alive
+    ob_type = as_pyobj(space, space.type(w_obj))
+    return rffi.cast(PyTypeObjectPtr, ob_type)
+
 def wrap_binaryfunc_l(space, w_self, w_args, func):
     func_binary = rffi.cast(binaryfunc, func)
     check_num_args(space, w_args, 1)
     args_w = space.fixedview(w_args)
-    ref = make_ref(space, w_self)
-    decref(space, ref)
     return generic_cpy_call(space, func_binary, w_self, args_w[0])
 
 def wrap_binaryfunc_r(space, w_self, w_args, func):
     func_binary = rffi.cast(binaryfunc, func)
     check_num_args(space, w_args, 1)
     args_w = space.fixedview(w_args)
-    ref = make_ref(space, w_self)
-    decref(space, ref)
     return generic_cpy_call(space, func_binary, args_w[0], w_self)
 
 def wrap_ternaryfunc(space, w_self, w_args, func):
@@ -121,8 +122,6 @@
     func_ternary = rffi.cast(ternaryfunc, func)
     check_num_argsv(space, w_args, 1, 2)
     args_w = space.fixedview(w_args)
-    ref = make_ref(space, w_self)
-    decref(space, ref)
     arg3 = space.w_None
     if len(args_w) > 1:
         arg3 = args_w[1]
@@ -314,12 +313,10 @@
 
 def wrap_getreadbuffer(space, w_self, w_args, func):
     func_target = rffi.cast(readbufferproc, func)
-    py_obj = make_ref(space, w_self)
-    py_type = py_obj.c_ob_type
+    py_type = _get_ob_type(space, w_self)
     rbp = rffi.cast(rffi.VOIDP, 0)
     if py_type.c_tp_as_buffer:
         rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer)
-    decref(space, py_obj)
     with lltype.scoped_alloc(rffi.VOIDPP.TO, 1) as ptr:
         index = rffi.cast(Py_ssize_t, 0)
         size = generic_cpy_call(space, func_target, w_self, index, ptr)
@@ -332,9 +329,7 @@
 
 def wrap_getwritebuffer(space, w_self, w_args, func):
     func_target = rffi.cast(readbufferproc, func)
-    py_obj = make_ref(space, w_self)
-    py_type = py_obj.c_ob_type
-    decref(space, py_obj)
+    py_type = _get_ob_type(space, w_self)
     rbp = rffi.cast(rffi.VOIDP, 0)
     if py_type.c_tp_as_buffer:
         rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer)
@@ -350,12 +345,10 @@
 
 def wrap_getbuffer(space, w_self, w_args, func):
     func_target = rffi.cast(getbufferproc, func)
-    py_obj = make_ref(space, w_self)
-    py_type = py_obj.c_ob_type
+    py_type = _get_ob_type(space, w_self)
     rbp = rffi.cast(rffi.VOIDP, 0)
     if py_type.c_tp_as_buffer:
         rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer)
-    decref(space, py_obj)
     with lltype.scoped_alloc(Py_buffer) as pybuf:
         _flags = 0
         if space.len_w(w_args) > 0:
diff --git a/pypy/module/cpyext/src/abstract.c b/pypy/module/cpyext/src/abstract.c
--- a/pypy/module/cpyext/src/abstract.c
+++ b/pypy/module/cpyext/src/abstract.c
@@ -96,6 +96,163 @@
     return 0;
 }
 
+void*
+PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices)
+{
+    char* pointer;
+    int i;
+    pointer = (char *)view->buf;
+    for (i = 0; i < view->ndim; i++) {
+        pointer += view->strides[i]*indices[i];
+        if ((view->suboffsets != NULL) && (view->suboffsets[i] >= 0)) {
+            pointer = *((char**)pointer) + view->suboffsets[i];
+        }
+    }
+    return (void*)pointer;
+}
+
+void
+_Py_add_one_to_index_F(int nd, Py_ssize_t *index, const Py_ssize_t *shape)
+{
+    int k;
+
+    for (k=0; k<nd; k++) {
+        if (index[k] < shape[k]-1) {
+            index[k]++;
+            break;
+        }
+        else {
+            index[k] = 0;
+        }
+    }
+}
+
+void
+_Py_add_one_to_index_C(int nd, Py_ssize_t *index, const Py_ssize_t *shape)
+{
+    int k;
+
+    for (k=nd-1; k>=0; k--) {
+        if (index[k] < shape[k]-1) {
+            index[k]++;
+            break;
+        }
+        else {
+            index[k] = 0;
+        }
+    }
+}
+
+  /* view is not checked for consistency in either of these.  It is
+     assumed that the size of the buffer is view->len in
+     view->len / view->itemsize elements.
+  */
+
+int
+PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort)
+{
+    int k;
+    void (*addone)(int, Py_ssize_t *, const Py_ssize_t *);
+    Py_ssize_t *indices, elements;
+    char *dest, *ptr;
+
+    if (len > view->len) {
+        len = view->len;
+    }
+
+    if (PyBuffer_IsContiguous(view, fort)) {
+        /* simplest copy is all that is needed */
+        memcpy(buf, view->buf, len);
+        return 0;
+    }
+
+    /* Otherwise a more elaborate scheme is needed */
+
+    /* view->ndim <= 64 */
+    indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
+    if (indices == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    for (k=0; k<view->ndim;k++) {
+        indices[k] = 0;
+    }
+
+    if (fort == 'F') {
+        addone = _Py_add_one_to_index_F;
+    }
+    else {
+        addone = _Py_add_one_to_index_C;
+    }
+    dest = buf;
+    /* XXX : This is not going to be the fastest code in the world
+             several optimizations are possible.
+     */
+    elements = len / view->itemsize;
+    while (elements--) {
+        ptr = PyBuffer_GetPointer(view, indices);
+        memcpy(dest, ptr, view->itemsize);
+        dest += view->itemsize;
+        addone(view->ndim, indices, view->shape);
+    }
+    PyMem_Free(indices);
+    return 0;
+}
+
+int
+PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort)
+{
+    int k;
+    void (*addone)(int, Py_ssize_t *, const Py_ssize_t *);
+    Py_ssize_t *indices, elements;
+    char *src, *ptr;
+
+    if (len > view->len) {
+        len = view->len;
+    }
+
+    if (PyBuffer_IsContiguous(view, fort)) {
+        /* simplest copy is all that is needed */
+        memcpy(view->buf, buf, len);
+        return 0;
+    }
+
+    /* Otherwise a more elaborate scheme is needed */
+
+    /* view->ndim <= 64 */
+    indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim));
+    if (indices == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    for (k=0; k<view->ndim;k++) {
+        indices[k] = 0;
+    }
+
+    if (fort == 'F') {
+        addone = _Py_add_one_to_index_F;
+    }
+    else {
+        addone = _Py_add_one_to_index_C;
+    }
+    src = buf;
+    /* XXX : This is not going to be the fastest code in the world
+             several optimizations are possible.
+     */
+    elements = len / view->itemsize;
+    while (elements--) {
+        ptr = PyBuffer_GetPointer(view, indices);
+        memcpy(ptr, src, view->itemsize);
+        src += view->itemsize;
+        addone(view->ndim, indices, view->shape);
+    }
+
+    PyMem_Free(indices);
+    return 0;
+}
+
+
+
 /* Buffer C-API for Python 3.0 */
 
 int
diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py
--- a/pypy/module/cpyext/test/test_memoryobject.py
+++ b/pypy/module/cpyext/test/test_memoryobject.py
@@ -137,7 +137,36 @@
                 view = PyMemoryView_GET_BUFFER(memoryview);
                 Py_DECREF(memoryview);
                 return PyLong_FromLong(view->len / view->itemsize);
-            """)])
+            """),
+            ("test_contiguous", "METH_O",
+             """
+                Py_buffer* view;
+                PyObject * memoryview;
+                void * buf = NULL;
+                int ret;
+                Py_ssize_t len;
+                memoryview = PyMemoryView_FromObject(args);
+                if (memoryview == NULL)
+                    return NULL;
+                view = PyMemoryView_GET_BUFFER(memoryview);
+                Py_DECREF(memoryview);
+                len = view->len;
+                if (len == 0)
+                    return NULL;
+                buf = malloc(len);
+                ret = PyBuffer_ToContiguous(buf, view, view->len, 'A');
+                if (ret != 0)
+                {
+                    free(buf);
+                    return NULL;
+                }
+                ret = PyBuffer_FromContiguous(view, buf, view->len, 'A');
+                free(buf);
+                if (ret != 0)
+                    return NULL;
+                 Py_RETURN_NONE;
+             """),
+            ])
         module = self.import_module(name='buffer_test')
         arr = module.PyMyArray(10)
         ten = foo.get_len(arr)
@@ -146,6 +175,7 @@
         assert ten == 10
         ten = foo.test_buffer(arr)
         assert ten == 10
+        foo.test_contiguous(arr)
 
     def test_releasebuffer(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -337,12 +337,8 @@
                 PyObject* name = PyBytes_FromString("mymodule");
                 PyObject *obj = PyType_Type.tp_alloc(&PyType_Type, 0);
                 PyHeapTypeObject *type = (PyHeapTypeObject*)obj;
-                if ((type->ht_type.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0)
-                {
-                    PyErr_SetString(PyExc_ValueError,
-                                    "Py_TPFLAGS_HEAPTYPE not set");
-                    return NULL;
-                }
+                /* this is issue #2434: logic from pybind11 */
+                type->ht_type.tp_flags |= Py_TPFLAGS_HEAPTYPE;
                 type->ht_type.tp_name = ((PyTypeObject*)args)->tp_name;
                 PyType_Ready(&type->ht_type);
                 ret = PyObject_SetAttrString((PyObject*)&type->ht_type,
diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py
--- a/pypy/module/imp/test/test_import.py
+++ b/pypy/module/imp/test/test_import.py
@@ -601,25 +601,15 @@
         import pkg.a, imp
         imp.reload(pkg.a)
 
-    def test_reload_builtin(self):
-        import sys, imp
-        oldpath = sys.path
-        try:
-            del sys.settrace
-        except AttributeError:
-            pass
-
-        imp.reload(sys)
-
-        assert sys.path is oldpath
-        assert 'settrace' not in dir(sys)    # at least on CPython 3.5.2
-
     def test_reload_builtin_doesnt_clear(self):
         import imp
         import sys
         sys.foobar = "baz"
-        imp.reload(sys)
-        assert sys.foobar == "baz"
+        try:
+            imp.reload(sys)
+            assert sys.foobar == "baz"
+        finally:
+            del sys.foobar
 
     def test_reimport_builtin_simple_case_1(self):
         import sys, time
@@ -637,18 +627,18 @@
 
     def test_reimport_builtin(self):
         import imp, sys, time
-        oldpath = sys.path
-        time.tzname = "<test_reimport_builtin removed this>"
+        old_sleep = time.sleep
+        time.sleep = "<test_reimport_builtin removed this>"
 
         del sys.modules['time']
         import time as time1
         assert sys.modules['time'] is time1
 
-        assert time.tzname == "<test_reimport_builtin removed this>"
+        assert time.sleep == "<test_reimport_builtin removed this>"
 
-        imp.reload(time1)   # don't leave a broken time.tzname behind
+        imp.reload(time1)   # don't leave a broken time.sleep behind
         import time
-        assert time.tzname != "<test_reimport_builtin removed this>"
+        assert time.sleep is old_sleep
 
     def test_reload_infinite(self):
         import infinite_reload
diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py
--- a/pypy/module/posix/__init__.py
+++ b/pypy/module/posix/__init__.py
@@ -243,6 +243,9 @@
             if getattr(rposix, _name) is not None:
                 interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
 
+    if hasattr(rposix, 'sched_yield'):
+        interpleveldefs['sched_yield'] = 'interp_posix.sched_yield'
+        
     for _name in ["O_CLOEXEC"]:
         if getattr(rposix, _name) is not None:
             interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2481,3 +2481,14 @@
             wrap_oserror(space, e, eintr_retry=True)
         else:
            return space.newint(s)
+
+def sched_yield(space):
+    """ Voluntarily relinquish the CPU"""
+    while True:
+        try:
+            res = rposix.sched_yield()
+        except OSError as e:
+            wrap_oserror(space, e, eintr_retry=True)
+        else:
+            return space.newint(res)
+>>>>>>> other
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -975,6 +975,12 @@
             assert isinstance(high, int) == True
             assert  high > low
 
+    if hasattr(rposix, 'sched_yield'):
+        def test_sched_yield(self):
+            os = self.posix
+            #Always suceeds on Linux
+            os.sched_yield() 
+
     def test_write_buffer(self):
         os = self.posix
         fd = os.open(self.path2 + 'test_write_buffer',
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -811,7 +811,7 @@
                     w_encoding)
 
     if space.is_none(w_namespace_separator):
-        namespace_separator = 0
+        namespace_separator = -1
     elif space.isinstance_w(w_namespace_separator, space.w_text):
         separator = space.text_w(w_namespace_separator)
         if len(separator) == 0:
@@ -834,7 +834,7 @@
     elif space.is_w(w_intern, space.w_None):
         w_intern = None
 
-    if namespace_separator:
+    if namespace_separator >= 0:
         xmlparser = XML_ParserCreateNS(
             encoding,
             rffi.cast(rffi.CHAR, namespace_separator))
diff --git a/pypy/module/pyexpat/test/test_parser.py b/pypy/module/pyexpat/test/test_parser.py
--- a/pypy/module/pyexpat/test/test_parser.py
+++ b/pypy/module/pyexpat/test/test_parser.py
@@ -58,9 +58,9 @@
                 p.CharacterDataHandler = lambda s: data.append(s)
                 encoding = encoding_arg is None and 'utf-8' or encoding_arg
 
-                res = p.Parse("<xml>\u00f6</xml>".encode(encoding), True)
+                res = p.Parse(u"<xml>\u00f6</xml>".encode(encoding), True)
                 assert res == 1
-                assert data == ["\u00f6"]
+                assert data == [u"\u00f6"]
 
     def test_get_handler(self):
         import pyexpat
@@ -210,6 +210,34 @@
             p.ParseFile(fake_reader)
             assert fake_reader.read_count == 4
 
+    def test_entities(self):
+        import pyexpat
+        parser = pyexpat.ParserCreate(None, "")
+
+        def startElement(tag, attrs):
+            assert tag == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF'
+            assert attrs == {
+                'http://www.w3.org/XML/1998/namespacebase':
+                'http://www.semanticweb.org/jiba/ontologies/2017/0/test'}
+        parser.StartElementHandler = startElement
+        parser.Parse("""<?xml version="1.0"?>
+
+        <!DOCTYPE rdf:RDF [
+        <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
+        <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
+        <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
+        <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
+        ]>
+
+        <rdf:RDF xmlns="http://www.semanticweb.org/jiba/ontologies/2017/0/test#"
+          xml:base="http://www.semanticweb.org/jiba/ontologies/2017/0/test"
+          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+          xmlns:owl="http://www.w3.org/2002/07/owl#"
+          xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
+          xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+        </rdf:RDF>
+        """, True)
+
 
     def test_exception(self):
         """
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py
@@ -37,7 +37,7 @@
     v = cffi.__version__.replace('+', '')
     p = os.path.join(parent, 'doc', 'source', 'installation.rst')
     content = open(p).read()
-    assert ("/cffi-%s.tar.gz" % v) in content
+    assert (" package version %s:" % v) in content
 
 def test_setup_version():
     parent = os.path.dirname(os.path.dirname(cffi.__file__))
diff --git a/pypy/module/test_lib_pypy/test_sqlite3.py b/pypy/module/test_lib_pypy/test_sqlite3.py
--- a/pypy/module/test_lib_pypy/test_sqlite3.py
+++ b/pypy/module/test_lib_pypy/test_sqlite3.py
@@ -228,6 +228,14 @@
         cur.execute("create table test(a)")
         cur.executemany("insert into test values (?)", [[1], [2], [3]])
         assert cur.lastrowid is None
+        # issue 2682
+        cur.execute('''insert
+                    into test 
+                    values (?)
+                    ''', (1, ))
+        assert cur.lastrowid is not None
+        cur.execute('''insert\t into test values (?) ''', (1, ))
+        assert cur.lastrowid is not None
 
     def test_authorizer_bad_value(self, con):
         def authorizer_cb(action, arg1, arg2, dbname, source):
diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py
--- a/pypy/module/unicodedata/test/test_hyp.py
+++ b/pypy/module/unicodedata/test/test_hyp.py
@@ -40,6 +40,7 @@
 
 @pytest.mark.parametrize('NF1, NF2, NF3', compositions)
 @example(s=u'---\uafb8\u11a7---')  # issue 2289
+ at example(s=u'\ufacf')
 @settings(max_examples=1000)
 @given(s=st.text())
 def test_composition(s, space, NF1, NF2, NF3):
diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -1,4 +1,7 @@
 # coding: utf-8
+
+from pypy.interpreter.error import OperationError
+
 class TestW_BytesObject:
 
     def teardown_method(self, method):
@@ -96,6 +99,78 @@
         w_res = space.call_function(space.w_bytes, space.wrap([42]))
         assert space.str_w(w_res) == '*'
 
+
+try:
+    from hypothesis import given, strategies
+except ImportError:
+    pass
+else:
+    @given(u=strategies.binary(),
+           start=strategies.integers(min_value=0, max_value=10),
+           len1=strategies.integers(min_value=-1, max_value=10))
+    def test_hypo_index_find(u, start, len1, space):
+        if start + len1 < 0:
+            return   # skip this case
+        v = u[start : start + len1]
+        w_u = space.wrap(u)
+        w_v = space.wrap(v)
+        expected = u.find(v, start, start + len1)
+        try:
+            w_index = space.call_method(w_u, 'index', w_v,
+                                        space.newint(start),
+                                        space.newint(start + len1))
+        except OperationError as e:
+            if not e.match(space, space.w_ValueError):
+                raise
+            assert expected == -1
+        else:
+            assert space.int_w(w_index) == expected >= 0
+
+        w_index = space.call_method(w_u, 'find', w_v,
+                                    space.newint(start),
+                                    space.newint(start + len1))
+        assert space.int_w(w_index) == expected
+
+        rexpected = u.rfind(v, start, start + len1)
+        try:
+            w_index = space.call_method(w_u, 'rindex', w_v,
+                                        space.newint(start),
+                                        space.newint(start + len1))
+        except OperationError as e:
+            if not e.match(space, space.w_ValueError):
+                raise
+            assert rexpected == -1
+        else:
+            assert space.int_w(w_index) == rexpected >= 0
+
+        w_index = space.call_method(w_u, 'rfind', w_v,
+                                    space.newint(start),
+                                    space.newint(start + len1))
+        assert space.int_w(w_index) == rexpected
+
+        expected = u.startswith(v, start)
+        w_res = space.call_method(w_u, 'startswith', w_v,
+                                  space.newint(start))
+        assert w_res is space.newbool(expected)
+
+        expected = u.startswith(v, start, start + len1)
+        w_res = space.call_method(w_u, 'startswith', w_v,
+                                  space.newint(start),
+                                  space.newint(start + len1))
+        assert w_res is space.newbool(expected)
+
+        expected = u.endswith(v, start)
+        w_res = space.call_method(w_u, 'endswith', w_v,
+                                  space.newint(start))
+        assert w_res is space.newbool(expected)
+
+        expected = u.endswith(v, start, start + len1)
+        w_res = space.call_method(w_u, 'endswith', w_v,
+                                  space.newint(start),
+                                  space.newint(start + len1))
+        assert w_res is space.newbool(expected)
+
+
 class AppTestBytesObject:
 
     def setup_class(cls):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -1,6 +1,7 @@
 # -*- encoding: utf-8 -*-
 import py
 import sys
+from pypy.interpreter.error import OperationError
 
 
 class TestUnicodeObject:
@@ -38,6 +39,55 @@
         space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni)
 
 
+try:
+    from hypothesis import given, strategies
+except ImportError:
+    pass
+else:
+    @given(u=strategies.text(),
+           start=strategies.integers(min_value=0, max_value=10),
+           len1=strategies.integers(min_value=-1, max_value=10))
+    def test_hypo_index_find(u, start, len1, space):
+        if start + len1 < 0:
+            return   # skip this case
+        v = u[start : start + len1]
+        w_u = space.wrap(u)
+        w_v = space.wrap(v)
+        expected = u.find(v, start, start + len1)
+        try:
+            w_index = space.call_method(w_u, 'index', w_v,
+                                        space.newint(start),
+                                        space.newint(start + len1))
+        except OperationError as e:
+            if not e.match(space, space.w_ValueError):
+                raise
+            assert expected == -1
+        else:
+            assert space.int_w(w_index) == expected >= 0
+
+        w_index = space.call_method(w_u, 'find', w_v,
+                                    space.newint(start),
+                                    space.newint(start + len1))
+        assert space.int_w(w_index) == expected
+
+        rexpected = u.rfind(v, start, start + len1)
+        try:
+            w_index = space.call_method(w_u, 'rindex', w_v,
+                                        space.newint(start),
+                                        space.newint(start + len1))
+        except OperationError as e:
+            if not e.match(space, space.w_ValueError):
+                raise
+            assert rexpected == -1
+        else:
+            assert space.int_w(w_index) == rexpected >= 0
+
+        w_index = space.call_method(w_u, 'rfind', w_v,
+                                    space.newint(start),
+                                    space.newint(start + len1))
+        assert space.int_w(w_index) == rexpected
+
+
 class AppTestUnicodeStringStdOnly:
     def test_compares(self):
         assert type('a') != type(b'a')
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -748,17 +748,6 @@
         interpreted as in slice notation.
         """
 
-    def decode():
-        """S.decode(encoding=None, errors='strict') -> string or unicode
-
-        Decode S using the codec registered for encoding. encoding defaults
-        to the default encoding. errors may be given to set a different error
-        handling scheme. Default is 'strict' meaning that encoding errors raise
-        a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
-        as well as any other name registered with codecs.register_error that is
-        able to handle UnicodeDecodeErrors.
-        """
-
     def encode():
         """S.encode(encoding=None, errors='strict') -> string or unicode
 
diff --git a/pypy/tool/pytest/apptest.py b/pypy/tool/pytest/apptest.py
--- a/pypy/tool/pytest/apptest.py
+++ b/pypy/tool/pytest/apptest.py
@@ -12,7 +12,7 @@
 from pypy.interpreter.gateway import app2interp_temp
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.function import Method
-from rpython.tool import runsubprocess
+from rpython.tool.runsubprocess import run_subprocess
 from pypy.tool.pytest import appsupport
 from pypy.tool.pytest.objspace import gettestobjspace
 from rpython.tool.udir import udir
@@ -67,14 +67,10 @@
 def _rename_module(name):
     return str(RENAMED_USEMODULES.get(name, name))
 
-
-def run_with_python(python_, target_, usemodules, **definitions):
-    if python_ is None:
-        py.test.skip("Cannot find the default python3 interpreter to run with -A")
-    # we assume that the source of target_ is in utf-8. Unfortunately, we don't
-    # have any easy/standard way to determine from here the original encoding
-    # of the source file
-    helpers = r"""# -*- encoding: utf-8 -*-
+# we assume that the source of target_ is in utf-8. Unfortunately, we don't
+# have any easy/standard way to determine from here the original encoding
+# of the source file
+helpers = r"""# -*- encoding: utf-8 -*-
 if 1:
     import sys
     sys.path.append(%r)
@@ -90,7 +86,7 @@
         import os
         try:
             if isinstance(func, str):
-                if func.startswith((' ', os.linesep)):
+                if func.startswith((' ', os.linesep, '\n')):
                     # it's probably an indented block, so we prefix if True:
                     # to avoid SyntaxError
                     func = "if True:\n" + func
@@ -109,6 +105,10 @@
         pass
     self = Test()
 """
+
+def run_with_python(python_, target_, usemodules, **definitions):
+    if python_ is None:
+        py.test.skip("Cannot find the default python3 interpreter to run with -A")
     defs = []
     for symbol, value in sorted(definitions.items()):
         if isinstance(value, tuple) and isinstance(value[0], py.code.Source):
@@ -181,7 +181,7 @@
     helper_dir = os.path.join(pypydir, 'tool', 'cpyext')
     env = os.environ.copy()
     env['PYTHONPATH'] = helper_dir
-    res, stdout, stderr = runsubprocess.run_subprocess(
+    res, stdout, stderr = run_subprocess(
         python_, [str(pyfile)], env=env)
     print pyfile.read()
     print >> sys.stdout, stdout
diff --git a/pypy/tool/pytest/objspace.py b/pypy/tool/pytest/objspace.py
--- a/pypy/tool/pytest/objspace.py
+++ b/pypy/tool/pytest/objspace.py
@@ -57,9 +57,6 @@
                         if not ok:
                             py.test.skip("cannot runappdirect test: "
                                          "module %r required" % (modname,))
-                else:
-                    if '__pypy__' in value:
-                        py.test.skip("no module __pypy__ on top of CPython")
                 continue
             if info is None:
                 py.test.skip("cannot runappdirect this test on top of CPython")
diff --git a/rpython/doc/jit/optimizer.rst b/rpython/doc/jit/optimizer.rst
--- a/rpython/doc/jit/optimizer.rst
+++ b/rpython/doc/jit/optimizer.rst
@@ -42,10 +42,9 @@
 There are better ways to compute the sum from ``[0..100]``, but it gives a better intuition on how
 traces are constructed than ``sum(range(101))``.
 Note that the trace syntax is the one used in the test suite. It is also very
-similar to traces printed at runtime by PYPYLOG_. The first line gives the input variables, the
-second line is a ``label`` operation, the last one is the backwards ``jump`` operation.
-
-.. _PYPYLOG: logging.html
+similar to traces printed at runtime by :doc:`PYPYLOG <../logging>`. The first
+line gives the input variables, the second line is a ``label`` operation, the
+last one is the backwards ``jump`` operation.
 
 These instructions mentioned earlier are special:
 
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -305,6 +305,10 @@
             # Transform into INT_ADD.  The following guard will be killed
             # by optimize_GUARD_NO_OVERFLOW; if we see instead an
             # optimize_GUARD_OVERFLOW, then InvalidLoop.
+
+            # NB: this case also takes care of int_add_ovf with 0 as on of the
+            # arguments: the result will be bounded, and then the optimization
+            # for int_add with 0 as argument will remove the op.
             op = self.replace_op_with(op, rop.INT_ADD)
         return self.emit(op)
 
@@ -325,6 +329,7 @@
             return None
         resbound = b0.sub_bound(b1)
         if resbound.bounded():
+            # this case takes care of int_sub_ovf(x, 0) as well
             op = self.replace_op_with(op, rop.INT_SUB)
         return self.emit(op)
 
@@ -342,6 +347,7 @@
         b2 = self.getintbound(op.getarg(1))
         resbound = b1.mul_bound(b2)
         if resbound.bounded():
+            # this case also takes care of multiplication with 0 and 1
             op = self.replace_op_with(op, rop.INT_MUL)
         return self.emit(op)
 
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py
@@ -1962,6 +1962,55 @@
         """
         self.optimize_loop(ops, expected)
 
+        ops = """
+        [i0]
+        i1 = int_mul_ovf(0, i0)
+        guard_no_overflow() []
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(0)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [i0]
+        i1 = int_mul_ovf(i0, 0)
+        guard_no_overflow() []
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(0)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [i0]
+        i1 = int_mul_ovf(1, i0)
+        guard_no_overflow() []
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
+
+        ops = """
+        [i0]
+        i1 = int_mul_ovf(i0, 1)
+        guard_no_overflow() []
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
+
+
     def test_fold_constant_partial_ops_float(self):
         ops = """
         [f0]
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -1863,6 +1863,8 @@
                               rffi.INT, save_err=rffi.RFFI_FULL_ERRNO_ZERO)
     c_sched_get_priority_min = external('sched_get_priority_min', [rffi.INT],
                              rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO)
+    if not _WIN32:
+        c_sched_yield = external('sched_yield', [], rffi.INT)
 
     @enforceargs(int)
     def sched_get_priority_max(policy):
@@ -1872,9 +1874,9 @@
     def sched_get_priority_min(policy):
         return handle_posix_error('sched_get_priority_min', c_sched_get_priority_min(policy))
 
-
-
-
+    def sched_yield():
+        return handle_posix_error('sched_yield', c_sched_yield())
+        
 #___________________________________________________________________
 
 c_chroot = external('chroot', [rffi.CCHARP], rffi.INT,
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -877,32 +877,31 @@
         ch = ord(s[pos])
         pos += 1
         ch2 = 0
-        if 0xD800 <= ch < 0xDC00:
-            if not allow_surrogates:
-                ru, rs, pos = errorhandler(errors, public_encoding_name,
-                                           'surrogates not allowed',
-                                           s, pos-1, pos)
-                if rs is not None:
-                    # py3k only
-                    if len(rs) % 4 != 0:
-                        errorhandler('strict', public_encoding_name,
-                                     'surrogates not allowed',
-                                     s, pos-1, pos)
-                    result.append(rs)
-                    continue
-                for ch in ru:
-                    if ord(ch) < 0xD800:
-                        _STORECHAR32(result, ord(ch), byteorder)
-                    else:
-                        errorhandler('strict', public_encoding_name,
-                                     'surrogates not allowed',
-                                     s, pos-1, pos)
+        if not allow_surrogates and 0xD800 <= ch < 0xE000:
+            ru, rs, pos = errorhandler(errors, public_encoding_name,
+                                        'surrogates not allowed',
+                                        s, pos-1, pos)
+            if rs is not None:
+                # py3k only
+                if len(rs) % 4 != 0:
+                    errorhandler('strict', public_encoding_name,
+                                    'surrogates not allowed',
+                                    s, pos-1, pos)
+                result.append(rs)
                 continue
-            elif MAXUNICODE < 65536 and pos < size:
-                ch2 = ord(s[pos])
-                if 0xDC00 <= ch2 < 0xE000:
-                    ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
-                    pos += 1
+            for ch in ru:
+                if ord(ch) < 0xD800:
+                    _STORECHAR32(result, ord(ch), byteorder)
+                else:
+                    errorhandler('strict', public_encoding_name,
+                                    'surrogates not allowed',
+                                    s, pos-1, pos)
+            continue
+        if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size:
+            ch2 = ord(s[pos])
+            if 0xDC00 <= ch2 < 0xE000:
+                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
+                pos += 1
         _STORECHAR32(result, ch, byteorder)
 
     return result.build()
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -810,6 +810,11 @@
     assert isinstance(low, int) == True
     assert isinstance(high, int) == True
     assert  high > low
+    
+ at rposix_requires('sched_yield')
+def test_sched_yield():
+    if sys.platform != 'win32':
+        rposix.sched_yield()
 
 @rposix_requires('lockf')
 def test_os_lockf():
diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py
--- a/rpython/rlib/test/test_runicode.py
+++ b/rpython/rlib/test/test_runicode.py
@@ -2,6 +2,7 @@
 
 import py
 import sys, random
+import struct
 from rpython.rlib import runicode
 
 from hypothesis import given, settings, strategies
@@ -266,11 +267,12 @@
         assert replace_with(u'rep', None) == '\x00<\x00r\x00e\x00p\x00>'
         assert replace_with(None, '\xca\xfe') == '\x00<\xca\xfe\x00>'
 
-    def test_utf32_surrogates(self):
+    @py.test.mark.parametrize('unich',[u"\ud800", u"\udc80"])
+    def test_utf32_surrogates(self, unich):
         assert runicode.unicode_encode_utf_32_be(
-            u"\ud800", 1, None) == '\x00\x00\xd8\x00'
+            unich, 1, None) == struct.pack('>i', ord(unich))
         py.test.raises(UnicodeEncodeError, runicode.unicode_encode_utf_32_be,
-                       u"\ud800", 1, None, allow_surrogates=False)
+                       unich, 1, None, allow_surrogates=False)
         def replace_with(ru, rs):
             def errorhandler(errors, enc, msg, u, startingpos, endingpos):
                 if errors == 'strict':
@@ -278,7 +280,7 @@
                                              endingpos, msg)
                 return ru, rs, endingpos
             return runicode.unicode_encode_utf_32_be(
-                u"<\ud800>", 3, None,
+                u"<%s>" % unich, 3, None,
                 errorhandler, allow_surrogates=False)
         assert replace_with(u'rep', None) == u'<rep>'.encode('utf-32-be')
         assert replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>'
@@ -432,7 +434,7 @@
             assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'ignore',
                         final=True) == (u'aaaabbbb', len(seq) + 8))
             assert (self.decoder(seq, len(seq), 'custom', final=True,
-                        errorhandler=self.custom_replace) == 
+                        errorhandler=self.custom_replace) ==
                         (FOO * len(seq), len(seq)))
             assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'custom',
                         final=True, errorhandler=self.custom_replace) ==
@@ -628,7 +630,7 @@
                                   msg='invalid continuation byte')
             assert self.decoder(seq, len(seq), 'replace', final=True
                                 ) == (res, len(seq))
-            assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 
+            assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8,
                                  'replace', final=True) ==
                         (u'aaaa' + res + u'bbbb', len(seq) + 8))
             res = res.replace(FFFD, u'')

From pypy.commits at gmail.com  Fri Nov  3 12:00:08 2017
From: pypy.commits at gmail.com (nanjekye)
Date: Fri, 03 Nov 2017 09:00:08 -0700 (PDT)
Subject: [pypy-commit] pypy os_lockf: left over merge conflict
Message-ID: <59fc9288.08921c0a.e8509.669f@mx.google.com>

Author: Joannah Nanjekye <nanjekyejoannah at gmail.com>
Branch: os_lockf
Changeset: r92919:3cb34f15cb82
Date: 2017-10-30 13:09 +0300
http://bitbucket.org/pypy/pypy/changeset/3cb34f15cb82/

Log:	left over merge conflict

diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -2491,4 +2491,3 @@
             wrap_oserror(space, e, eintr_retry=True)
         else:
             return space.newint(res)
->>>>>>> other

From pypy.commits at gmail.com  Fri Nov  3 12:46:26 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 03 Nov 2017 09:46:26 -0700 (PDT)
Subject: [pypy-commit] buildbot default: Run the extra_tests/ tests in a
 virtualenv in translated test runs.
Message-ID: <59fc9d62.089e1c0a.d6780.7ea3@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r1037:0ba20064633a
Date: 2017-11-02 16:45 +0000
http://bitbucket.org/pypy/buildbot/changeset/0ba20064633a/

Log:	Run the extra_tests/ tests in a virtualenv in translated test runs.

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -282,7 +282,7 @@
 # a good idea, passing a "--rev" argument here changes the order of
 # the checkouts.  Then our revisions "12345:432bcbb1ba" are bogus.
 def _my_pullUpdate(self, res):
-    command = ['pull' , self.repourl]
+    command = ['pull', self.repourl]
     #if self.revision:                   <disabled!>
     #    command.extend(['--rev', self.revision])
     d = self._dovccmd(command)
@@ -447,7 +447,7 @@
         ))
 
     if app_tests:
-        if app_tests == True:
+        if app_tests is True:
             app_tests = []
         factory.addStep(PytestCmd(
             description="app-level (-A) test",
@@ -457,6 +457,28 @@
             timeout=4000,
             env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest),
                 }))
+        test_interpreter = '../build/pypy/goal/pypy-c'
+        factory.addStep(ShellCmd(
+            description="Create virtualenv",
+            command=prefix + ['virtualenv', '--clear', '-p', test_interpreter,
+                'pypy-venv'],
+            workdir='venv',
+            flunkOnFailure=True))
+        if platform == 'win32':
+            virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe'
+        else:
+            virt_pypy = '../venv/pypy-venv/bin/python'
+        factory.addStep(ShellCmd(
+            description="Install extra tests requirements",
+            command=prefix + [virt_pypy, '-m', 'pip', 'install',
+                '-r', '../build/extra_tests/requirements.txt'],
+            workdir='testing'))
+        factory.addStep(PytestCmd(
+            description="Run extra tests",
+            command=prefix + [virt_pypy, '-m', 'pytest',
+                '../build/extra_tests', '--resultlog=extra.log'],
+            logfiles={'pytestLog': 'extra.log'},
+            workdir='testing'))
 
     if lib_python:
         factory.addStep(PytestCmd(

From pypy.commits at gmail.com  Fri Nov  3 12:51:48 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 03 Nov 2017 09:51:48 -0700 (PDT)
Subject: [pypy-commit] pypy run-extra-tests: document branch
Message-ID: <59fc9ea4.88c5df0a.9d3a1.be03@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: run-extra-tests
Changeset: r92921:f81b135f1265
Date: 2017-11-03 16:49 +0000
http://bitbucket.org/pypy/pypy/changeset/f81b135f1265/

Log:	document branch

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -17,3 +17,6 @@
 .. branch: bsd-patches
 Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
 tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot

From pypy.commits at gmail.com  Fri Nov  3 12:51:50 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 03 Nov 2017 09:51:50 -0700 (PDT)
Subject: [pypy-commit] pypy default: merge branch run-extra-tests
Message-ID: <59fc9ea6.177c1c0a.b5fcf.9ae6@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92922:a88ed18e1a6a
Date: 2017-11-03 16:51 +0000
http://bitbucket.org/pypy/pypy/changeset/a88ed18e1a6a/

Log:	merge branch run-extra-tests

diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis
diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_failing.py
@@ -0,0 +1,8 @@
+from hypothesis import given, strategies
+
+def mean(a, b):
+    return (a + b)/2.
+
+ at given(strategies.integers(), strategies.integers())
+def test_mean_failing(a, b):
+    assert mean(a, b) >= min(a, b)
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -17,3 +17,6 @@
 .. branch: bsd-patches
 Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
 tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot

From pypy.commits at gmail.com  Fri Nov  3 15:13:39 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 03 Nov 2017 12:13:39 -0700 (PDT)
Subject: [pypy-commit] pypy default: Kill fake test
Message-ID: <59fcbfe3.69a8df0a.fef90.b31d@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92923:d1cd247b10f6
Date: 2017-11-03 19:13 +0000
http://bitbucket.org/pypy/pypy/changeset/d1cd247b10f6/

Log:	Kill fake test

diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py
deleted file mode 100644
--- a/extra_tests/test_failing.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from hypothesis import given, strategies
-
-def mean(a, b):
-    return (a + b)/2.
-
- at given(strategies.integers(), strategies.integers())
-def test_mean_failing(a, b):
-    assert mean(a, b) >= min(a, b)

From pypy.commits at gmail.com  Sat Nov  4 07:10:07 2017
From: pypy.commits at gmail.com (antocuni)
Date: Sat, 04 Nov 2017 04:10:07 -0700 (PDT)
Subject: [pypy-commit] pypy vmprof-0.4.10: a branch where to update the code
 to vmprof 0.4.10
Message-ID: <59fda00f.6293df0a.4e1d5.0bdb@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92924:4d73e43ae3fb
Date: 2017-11-04 11:15 +0100
http://bitbucket.org/pypy/pypy/changeset/4d73e43ae3fb/

Log:	a branch where to update the code to vmprof 0.4.10


From pypy.commits at gmail.com  Sat Nov  4 07:10:11 2017
From: pypy.commits at gmail.com (antocuni)
Date: Sat, 04 Nov 2017 04:10:11 -0700 (PDT)
Subject: [pypy-commit] pypy vmprof-0.4.10: I claim that tests should NEVER
 fail silently;
 I think that test_native actually fails on linux, but the builtbot never
 noticed because vmprof is not installed. Probably this test will start
 failing because of missing vmprof, we'll think of a fix later
Message-ID: <59fda013.0a0b1c0a.300cd.d940@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92926:526d9b94882a
Date: 2017-11-04 12:09 +0100
http://bitbucket.org/pypy/pypy/changeset/526d9b94882a/

Log:	I claim that tests should NEVER fail silently; I think that
	test_native actually fails on linux, but the builtbot never noticed
	because vmprof is not installed. Probably this test will start
	failing because of missing vmprof, we'll think of a fix later

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -141,10 +141,6 @@
     fn = compile(f, [], gcpolicy="minimark")
     assert fn() == 0
     try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
         check_profile(tmpfilename)
     finally:
         assert os.path.exists(tmpfilename)
@@ -231,10 +227,6 @@
     fn = compile(f, [], gcpolicy="incminimark", lldebug=True)
     assert fn() == 0
     try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
         check_profile(tmpfilename)
     finally:
         assert os.path.exists(tmpfilename)

From pypy.commits at gmail.com  Sat Nov  4 07:10:09 2017
From: pypy.commits at gmail.com (antocuni)
Date: Sat, 04 Nov 2017 04:10:09 -0700 (PDT)
Subject: [pypy-commit] pypy vmprof-0.4.10: bah,
 I think that this test did not actually test anything because on
 buildbot the cwd was different that the test expects,
 and thus os.walk returned an empty list O_o. Make it more robust,
 and actually check all files instead of stopping at the first one
Message-ID: <59fda011.1cbf1c0a.fbe1b.7909@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92925:0317d4f69638
Date: 2017-11-04 12:03 +0100
http://bitbucket.org/pypy/pypy/changeset/0317d4f69638/

Log:	bah, I think that this test did not actually test anything because
	on buildbot the cwd was different that the test expects, and thus
	os.walk returned an empty list O_o. Make it more robust, and
	actually check all files instead of stopping at the first one

diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -2,6 +2,7 @@
 import urllib2, py
 from os.path import join
 
+RVMPROF = py.path.local(__file__).join('..', '..')
 
 def github_raw_file(repo, path, branch='master'):
     return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict(
@@ -10,17 +11,26 @@
 
 
 def test_same_file():
-    for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'):
-        for file in files:
-            if not (file.endswith(".c") or file.endswith(".h")):
-                continue
-            url = github_raw_file("vmprof/vmprof-python", "src/%s" % file)
-            source = urllib2.urlopen(url).read()
-            #
-            dest = py.path.local(join(root, file)).read()
-            if source != dest:
-                raise AssertionError("%s was updated, but changes were"
-                                     "not copied over to PyPy" % url)
-            else:
-                print("%s matches" % url)
-        break # do not walk dirs
+    shared = RVMPROF.join('src', 'shared')
+    files = shared.listdir('*.[ch]')
+    assert files, 'cannot find any C file, probably the directory is wrong?'
+    no_matches = []
+    print
+    for file in files:
+        url = github_raw_file("vmprof/vmprof-python", "src/%s" % file.basename)
+        source = urllib2.urlopen(url).read()
+        dest = file.read()
+        shortname = file.relto(RVMPROF)
+        if source == dest:
+            print '%s matches' % shortname
+        else:
+            print '%s does NOT match' % shortname
+            no_matches.append(file)
+    #
+    if no_matches:
+        print
+        print 'The following file dit NOT match'
+        for f in no_matches:
+            print '   ', f.relto(RVMPROF)
+        raise AssertionError("some files were updated on github, "
+                             "but were not copied here")

From pypy.commits at gmail.com  Sat Nov  4 14:08:04 2017
From: pypy.commits at gmail.com (mattip)
Date: Sat, 04 Nov 2017 11:08:04 -0700 (PDT)
Subject: [pypy-commit] pypy default: graft parts of 287c9946859b that
 provide rposix.lockf in rpython
Message-ID: <59fe0204.90b2df0a.de390.f0dc@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92928:9d22ff3be2ae
Date: 2017-11-04 20:07 +0200
http://bitbucket.org/pypy/pypy/changeset/9d22ff3be2ae/

Log:	graft parts of 287c9946859b that provide rposix.lockf in rpython

diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -276,6 +276,10 @@
     SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER')
     SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH')
     O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK')
+    F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK')
+    F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK')
+    F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK')
+    F_TEST = rffi_platform.DefinedConstantInteger('F_TEST')
     OFF_T = rffi_platform.SimpleType('off_t')
     OFF_T_SIZE = rffi_platform.SizeOf('off_t')
 
@@ -548,6 +552,14 @@
             if error != 0:
                 raise OSError(error, 'posix_fadvise failed')
 
+    c_lockf = external('lockf',
+            [rffi.INT, rffi.INT , OFF_T], rffi.INT,
+            save_err=rffi.RFFI_SAVE_ERRNO)
+    @enforceargs(int, None, None)
+    def lockf(fd, cmd, length):
+        validate_fd(fd)
+        return handle_posix_error('lockf', c_lockf(fd, cmd, length))
+
 c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT,
                        macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO)
 c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT,
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -816,3 +816,14 @@
     if sys.platform != 'win32':
         rposix.sched_yield()
 
+ at rposix_requires('lockf')
+def test_os_lockf():
+    fname = str(udir.join('os_test.txt'))
+    fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777)
+    try:
+        os.write(fd, b'test')
+        os.lseek(fd, 0, 0)
+        rposix.lockf(fd, rposix.F_LOCK, 4)
+        rposix.lockf(fd, rposix.F_ULOCK, 4)
+    finally:
+        os.close(fd)

From pypy.commits at gmail.com  Sat Nov  4 14:19:22 2017
From: pypy.commits at gmail.com (stian)
Date: Sat, 04 Nov 2017 11:19:22 -0700 (PDT)
Subject: [pypy-commit] pypy math-improvements: Make rshift invert (in most
 cases) in place,
 this makes a huge speedup for rshift with negative numbers as it avoids two
 extra copies, also make an rqshift for the power of twos
Message-ID: <59fe04aa.4a9fdf0a.b3328.8a76@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92929:f30c2f38b0b5
Date: 2017-11-04 19:18 +0100
http://bitbucket.org/pypy/pypy/changeset/f30c2f38b0b5/

Log:	Make rshift invert (in most cases) in place, this makes a huge
	speedup for rshift with negative numbers as it avoids two extra
	copies, also make an rqshift for the power of twos

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -787,7 +787,7 @@
             if digit == 1:
                 return rbigint(self._digits[:self.numdigits()], 1, self.numdigits())
             elif digit and digit & (digit - 1) == 0:
-                return self.rshift(ptwotable[digit])
+                return self.rqshift(ptwotable[digit])
 
         div, mod = _divrem(self, other)
         if mod.sign * other.sign == -1:
@@ -816,7 +816,7 @@
             if digit == 1:
                 return self
             elif digit & (digit - 1) == 0:
-                return self.rshift(ptwotable[digit])
+                return self.rqshift(ptwotable[digit])
             
         div, mod = _divrem1(self, digit)
 
@@ -1267,31 +1267,85 @@
             raise ValueError("negative shift count")
         elif int_other == 0:
             return self
+        invert = False
         if self.sign == -1 and not dont_invert:
-            a = self.invert().rshift(int_other)
-            return a.invert()
+            first = self.digit(0)
+            if first == 0:
+                a = self.invert().rshift(int_other)
+                return a.invert()
+            invert = True
 
         wordshift = int_other / SHIFT
+        loshift = int_other % SHIFT
         newsize = self.numdigits() - wordshift
         if newsize <= 0:
-            return NULLRBIGINT
-
-        loshift = int_other % SHIFT
+            if invert:
+                return ONENEGATIVERBIGINT
+            else:
+                return NULLRBIGINT
+
+        
         hishift = SHIFT - loshift
         z = rbigint([NULLDIGIT] * newsize, self.sign, newsize)
         i = 0
         while i < newsize:
-            newdigit = (self.udigit(wordshift) >> loshift)
+            digit = self.udigit(wordshift)
+            if i == 0 and invert and wordshift == 0:
+                digit -= 1
+            newdigit = (digit >> loshift)
             if i+1 < newsize:
                 newdigit |= (self.udigit(wordshift+1) << hishift)
             z.setdigit(i, newdigit)
             i += 1
             wordshift += 1
+        if invert:
+            z.setdigit(0, z.digit(0)+1)
         z._normalize()
         return z
     rshift._always_inline_ = 'try' # It's so fast that it's always benefitial.
 
     @jit.elidable
+    def rqshift(self, int_other):
+        wordshift = int_other / SHIFT
+        loshift = int_other % SHIFT
+        newsize = self.numdigits() - wordshift
+
+        invert = False
+        if self.sign == -1:
+            first = self.digit(0)
+            if first == 0:
+                a = self.invert().rqshift(int_other)
+                return a.invert()
+            invert = True
+            
+        if newsize <= 0:
+            if invert:
+                return ONENEGATIVERBIGINT
+            else:
+                return NULLRBIGINT
+                
+        
+        hishift = SHIFT - loshift
+        z = rbigint([NULLDIGIT] * newsize, self.sign, newsize)
+        i = 0
+        inverted = False
+        while i < newsize:
+            digit = self.udigit(wordshift)
+            if invert and i == 0 and wordshift == 0:
+                digit -= 1
+            newdigit = (digit >> loshift)
+            if i+1 < newsize:
+                newdigit |= (self.udigit(wordshift+1) << hishift)
+            z.setdigit(i, newdigit)
+            i += 1
+            wordshift += 1
+        if invert:
+            z.setdigit(0, z.digit(0)+1)      
+        z._normalize()
+        return z
+    rshift._always_inline_ = 'try' # It's so fast that it's always benefitial.
+    
+    @jit.elidable
     def abs_rshift_and_mask(self, bigshiftcount, mask):
         assert isinstance(bigshiftcount, r_ulonglong)
         assert mask >= 0
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -598,6 +598,33 @@
                     res3 = f1.abs_rshift_and_mask(r_ulonglong(y), mask)
                     assert res3 == (abs(x) >> y) & mask
 
+    def test_qshift(self):
+        for x in range(10):
+            for y in range(1, 161, 16):
+                num = (x << y) + x
+                f1 = rbigint.fromlong(num)
+                nf1 = rbigint.fromlong(-num)
+                
+                for z in range(1, 31):
+                    res1 = f1.lqshift(z).tolong() 
+                    res2 = f1.rqshift(z).tolong() 
+                    res3 = nf1.lqshift(z).tolong() 
+                    res4 = nf1.rqshift(z).tolong() 
+                    
+                    assert res1 == num << z
+                    assert res2 == num >> z
+                    assert res3 == -num << z
+                    assert res4 == -num >> z
+                    
+        # Large digit
+        for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10):
+            f1 = rbigint.fromlong(x)
+            nf1 = rbigint.fromlong(-x)
+            assert f1.rqshift(SHIFT).tolong() == x >> SHIFT 
+            assert nf1.rqshift(SHIFT).tolong() == -x >> SHIFT
+            assert f1.rqshift(SHIFT+1).tolong() == x >> (SHIFT+1)
+            assert nf1.rqshift(SHIFT+1).tolong() == -x >> (SHIFT+1)
+                    
     def test_from_list_n_bits(self):
         for x in ([3L ** 30L, 5L ** 20L, 7 ** 300] +
                   [1L << i for i in range(130)] +

From pypy.commits at gmail.com  Sat Nov  4 13:32:37 2017
From: pypy.commits at gmail.com (mattip)
Date: Sat, 04 Nov 2017 10:32:37 -0700 (PDT)
Subject: [pypy-commit] pypy default: whoops in 5c8b7f2cd6b7
Message-ID: <59fdf9b5.90051c0a.ada36.e348@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92927:1ea57a8b4a91
Date: 2017-11-04 19:31 +0200
http://bitbucket.org/pypy/pypy/changeset/1ea57a8b4a91/

Log:	whoops in 5c8b7f2cd6b7

diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py
--- a/pypy/module/zipimport/test/test_zipimport.py
+++ b/pypy/module/zipimport/test/test_zipimport.py
@@ -197,7 +197,7 @@
         self.writefile("xxbad_pyc.pyc", test_pyc)
         raises(zipimport.ZipImportError,
                "__import__('xxbad_pyc', globals(), locals(), [])")
-        assert 'uu' not in sys.modules
+        assert 'xxbad_pyc' not in sys.modules
 
     def test_force_py(self):
         import sys

From pypy.commits at gmail.com  Sat Nov  4 16:28:39 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 04 Nov 2017 13:28:39 -0700 (PDT)
Subject: [pypy-commit] pypy default: Add extra-tests for string methods,
 matching the interp-level tests added in 88bed3bb8ad4
Message-ID: <59fe22f7.4d051c0a.bb8f7.65bd@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92930:b97f900404e5
Date: 2017-11-04 20:28 +0000
http://bitbucket.org/pypy/pypy/changeset/b97f900404e5/

Log:	Add extra-tests for string methods, matching the interp-level tests
	added in 88bed3bb8ad4

diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_bytes.py
@@ -0,0 +1,82 @@
+from hypothesis import strategies as st
+from hypothesis import given, example
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.binary(), st.binary())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st.binary(), st.binary(), st.integers())
+def test_startswith_start(u, v, start):
+    expected = u[start:].startswith(v) if v else (start <= len(u))
+    assert u.startswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st.binary(), st.binary(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.binary(), st.binary())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st.binary(), st.binary(), st.integers())
+def test_endswith_2(u, v, start):
+    expected = u[start:].endswith(v) if v else (start <= len(u))
+    assert u.endswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st.binary(), st.binary(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py
--- a/extra_tests/test_unicode.py
+++ b/extra_tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
 import pytest
 from hypothesis import strategies as st
 from hypothesis import given, settings, example
@@ -32,3 +33,89 @@
 @given(s=st.text())
 def test_composition(s, norm1, norm2, norm3):
     assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s)
+
+ at given(st.text(), st.text(), st.text())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.text(), st.text())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_startswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.startswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.text(), st.text())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_endswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.endswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected

From pypy.commits at gmail.com  Sat Nov  4 17:14:29 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 04 Nov 2017 14:14:29 -0700 (PDT)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <59fe2db5.4d051c0a.bb8f7.6ffa@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92931:a433b30d93a4
Date: 2017-11-04 21:06 +0000
http://bitbucket.org/pypy/pypy/changeset/a433b30d93a4/

Log:	hg merge default

diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis
diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_bytes.py
@@ -0,0 +1,82 @@
+from hypothesis import strategies as st
+from hypothesis import given, example
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.binary(), st.binary(), st.binary())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.binary(), st.binary())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st.binary(), st.binary(), st.integers())
+def test_startswith_start(u, v, start):
+    expected = u[start:].startswith(v) if v else (start <= len(u))
+    assert u.startswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st.binary(), st.binary(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.binary(), st.binary())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st.binary(), st.binary(), st.integers())
+def test_endswith_2(u, v, start):
+    expected = u[start:].endswith(v) if v else (start <= len(u))
+    assert u.endswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st.binary(), st.binary(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py
--- a/extra_tests/test_unicode.py
+++ b/extra_tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
 import pytest
 from hypothesis import strategies as st
 from hypothesis import given, settings, example
@@ -32,3 +33,89 @@
 @given(s=st.text())
 def test_composition(s, norm1, norm2, norm3):
     assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s)
+
+ at given(st.text(), st.text(), st.text())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.text(), st.text())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_startswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.startswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.text(), st.text())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_endswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.endswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -17,6 +17,12 @@
         with fp:
             imp.load_module('_testcapi', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)
 
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,13 @@
 
 .. branch: docs-osx-brew-openssl
 
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot
diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy3' '*/bin/libpypy3-c.so'"
+    binfiles = "'*/bin/pypy3*' '*/bin/libpypy3-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -314,7 +314,7 @@
 # ========================================================================
 
 class W_CDLL(W_Root):
-    def __init__(self, space, name, mode):
+    def __init__(self, space, name, mode, handle):
         self.flags = libffi.FUNCFLAG_CDECL
         self.space = space
         if name is None:
@@ -322,7 +322,7 @@
         else:
             self.name = name
         try:
-            self.cdll = libffi.CDLL(name, mode)
+            self.cdll = libffi.CDLL(name, mode, handle)
         except DLOpenError as e:
             raise wrap_dlopenerror(space, e, self.name)
         except OSError as e:
@@ -344,9 +344,9 @@
     def getidentifier(self, space):
         return space.newint(self.cdll.getidentifier())
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_cdll(space, w_type, name, mode=-1):
-    return W_CDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_cdll(space, w_type, name, mode=-1, handle=0):
+    return W_CDLL(space, name, mode, handle)
 
 
 W_CDLL.typedef = TypeDef(
@@ -359,13 +359,13 @@
     )
 
 class W_WinDLL(W_CDLL):
-    def __init__(self, space, name, mode):
-        W_CDLL.__init__(self, space, name, mode)
+    def __init__(self, space, name, mode, handle):
+        W_CDLL.__init__(self, space, name, mode, handle)
         self.flags = libffi.FUNCFLAG_STDCALL
 
- at unwrap_spec(name='fsencode_or_none', mode=int)
-def descr_new_windll(space, w_type, name, mode=-1):
-    return W_WinDLL(space, name, mode)
+ at unwrap_spec(name='fsencode_or_none', mode=int, handle=int)
+def descr_new_windll(space, w_type, name, mode=-1, handle=0):
+    return W_WinDLL(space, name, mode, handle)
 
 
 W_WinDLL.typedef = TypeDef(
@@ -380,4 +380,4 @@
 # ========================================================================
 
 def get_libc(space):
-    return W_CDLL(space, get_libc_name(), -1)
+    return W_CDLL(space, get_libc_name(), -1, 0)
diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py
--- a/pypy/module/_vmprof/test/test__vmprof.py
+++ b/pypy/module/_vmprof/test/test__vmprof.py
@@ -1,3 +1,4 @@
+import py
 import sys
 from rpython.tool.udir import udir
 from pypy.tool.pytest.objspace import gettestobjspace
@@ -110,6 +111,7 @@
         _vmprof.disable()
         assert _vmprof.is_enabled() is False
 
+    @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented")
     def test_get_profile_path(self):
         import _vmprof
         tmpfile = open(self.tmpfilename, 'wb')
diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py
--- a/pypy/module/termios/test/test_termios.py
+++ b/pypy/module/termios/test/test_termios.py
@@ -7,9 +7,6 @@
 if os.name != 'posix':
     py.test.skip('termios module only available on unix')
 
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
-
 class TestTermios(object):
     def setup_class(cls):
         try:
diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py
--- a/pypy/module/test_lib_pypy/pyrepl/__init__.py
+++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py
@@ -1,6 +1,3 @@
 import sys
 import lib_pypy.pyrepl
 sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl']
-
-if sys.platform.startswith('freebsd'):
-    raise Exception('XXX seems to hangs on FreeBSD9')
diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
--- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py
+++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py
@@ -4,7 +4,7 @@
 
 
 @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or "
-                    "'kfreebsd' in sys.platform")
+                    "'freebsd' in sys.platform")
 def test_raw_input():
     import os
     import pty
diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py
--- a/pypy/module/zipimport/test/test_zipimport.py
+++ b/pypy/module/zipimport/test/test_zipimport.py
@@ -196,19 +196,19 @@
         m0 = self.get_pyc()[0]
         m0 ^= 0x04
         test_pyc = bytes([m0]) + self.get_pyc()[1:]
-        self.writefile("uu.pyc", test_pyc)
+        self.writefile("xxbad_pyc.pyc", test_pyc)
         raises(zipimport.ZipImportError,
-               "__import__('uu', globals(), locals(), [])")
-        assert 'uu' not in sys.modules
+               "__import__('xxbad_pyc', globals(), locals(), [])")
+        assert 'xxbad_pyc' not in sys.modules
 
     def test_force_py(self):
         import sys
         m0 = self.get_pyc()[0]
         m0 ^= 0x04
         test_pyc = bytes([m0]) + self.get_pyc()[1:]
-        self.writefile("uu.pyc", test_pyc)
-        self.writefile("uu.py", "def f(x): return x")
-        mod = __import__("uu", globals(), locals(), [])
+        self.writefile("xxforce_py.pyc", test_pyc)
+        self.writefile("xxforce_py.py", "def f(x): return x")
+        mod = __import__("xxforce_py", globals(), locals(), [])
         assert mod.f(3) == 3
 
     def test_sys_modules(self):
diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py
--- a/pypy/tool/cpyext/extbuild.py
+++ b/pypy/tool/cpyext/extbuild.py
@@ -246,13 +246,13 @@
     if sys.platform == 'win32':
         compile_extra = ["/we4013"]
         link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')]
-    elif sys.platform == 'darwin':
-        compile_extra = link_extra = None
-        pass
     elif sys.platform.startswith('linux'):
         compile_extra = [
             "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"]
         link_extra = None
+    else:
+        compile_extra = link_extra = None
+        pass
     return ExtensionCompiler(
         builddir_base=base_dir,
         include_extra=[get_python_inc()],
diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py
--- a/pypy/tool/release/package.py
+++ b/pypy/tool/release/package.py
@@ -224,8 +224,9 @@
     old_dir = os.getcwd()
     try:
         os.chdir(str(builddir))
-        for source, target in binaries:
-            smartstrip(bindir.join(target), keep_debug=options.keep_debug)
+        if not _fake:
+            for source, target in binaries:
+                smartstrip(bindir.join(target), keep_debug=options.keep_debug)
         #
         if USE_ZIPFILE_MODULE:
             import zipfile
diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py
--- a/pypy/tool/release/smartstrip.py
+++ b/pypy/tool/release/smartstrip.py
@@ -19,6 +19,9 @@
     if sys.platform == 'linux2':
         os.system("objcopy --only-keep-debug %s %s" % (exe, debug))
         os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe))
+        perm = debug.stat().mode
+        perm &= ~(0111) # remove the 'x' bit
+        debug.chmod(perm)
 
 def smartstrip(exe, keep_debug=True):
     exe = py.path.local(exe)
diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py
--- a/pypy/tool/release/test/test_smartstrip.py
+++ b/pypy/tool/release/test/test_smartstrip.py
@@ -42,6 +42,9 @@
         smartstrip(exe, keep_debug=True)
         debug = tmpdir.join("myprog.debug")
         assert debug.check(file=True)
+        perm = debug.stat().mode & 0777
+        assert perm & 0111 == 0 # 'x' bit not set
+        #
         info = info_symbol(exe, "foo")
         assert info == "foo in section .text of %s" % exe
         #
diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py
--- a/rpython/rlib/libffi.py
+++ b/rpython/rlib/libffi.py
@@ -434,11 +434,12 @@
 
 # XXX: it partially duplicate the code in clibffi.py
 class CDLL(object):
-    def __init__(self, libname, mode=-1):
+    def __init__(self, libname, mode=-1, lib=0):
         """Load the library, or raises DLOpenError."""
-        self.lib = rffi.cast(DLLHANDLE, 0)
-        with rffi.scoped_str2charp(libname) as ll_libname:
-            self.lib = dlopen(ll_libname, mode)
+        self.lib = rffi.cast(DLLHANDLE, lib)
+        if lib == 0:
+            with rffi.scoped_str2charp(libname) as ll_libname:
+                self.lib = dlopen(ll_libname, mode)
 
     def __del__(self):
         if self.lib:
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -47,7 +47,10 @@
     # Guessing a BSD-like Unix platform
     compile_extra += ['-DVMPROF_UNIX']
     compile_extra += ['-DVMPROF_MAC']
-    _libs = []
+    if sys.platform.startswith('freebsd'):
+        _libs = ['unwind']
+    else:
+        _libs = []
 
 
 eci_kwds = dict(
diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py
--- a/rpython/rtyper/lltypesystem/ll2ctypes.py
+++ b/rpython/rtyper/lltypesystem/ll2ctypes.py
@@ -1147,7 +1147,7 @@
     libc_name = get_libc_name()     # Make sure the name is determined during import, not at runtime
     if _FREEBSD:
         RTLD_DEFAULT = -2  # see <dlfcn.h>
-        rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs)
+        rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs)
     # XXX is this always correct???
     standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs)
 
@@ -1243,7 +1243,7 @@
 
     if cfunc is None:
         if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'):
-            cfunc = get_on_lib(rtld_default_lib, funcname)
+            cfunc = rtld_default_lib[funcname]
         else:
             cfunc = get_on_lib(standard_c_lib, funcname)
         # XXX magic: on Windows try to load the function from 'kernel32' too

From pypy.commits at gmail.com  Sat Nov  4 17:14:31 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 04 Nov 2017 14:14:31 -0700 (PDT)
Subject: [pypy-commit] pypy py3.5: Kill tests involving dodgy comparisons
 with CPython 2 and superseded by b97f900404e5
Message-ID: <59fe2db7.4fc7df0a.3c6f8.883d@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92932:186f4b89a84a
Date: 2017-11-04 21:13 +0000
http://bitbucket.org/pypy/pypy/changeset/186f4b89a84a/

Log:	Kill tests involving dodgy comparisons with CPython 2 and superseded
	by b97f900404e5

diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py
--- a/pypy/objspace/std/test/test_bytesobject.py
+++ b/pypy/objspace/std/test/test_bytesobject.py
@@ -100,77 +100,6 @@
         assert space.str_w(w_res) == '*'
 
 
-try:
-    from hypothesis import given, strategies
-except ImportError:
-    pass
-else:
-    @given(u=strategies.binary(),
-           start=strategies.integers(min_value=0, max_value=10),
-           len1=strategies.integers(min_value=-1, max_value=10))
-    def test_hypo_index_find(u, start, len1, space):
-        if start + len1 < 0:
-            return   # skip this case
-        v = u[start : start + len1]
-        w_u = space.wrap(u)
-        w_v = space.wrap(v)
-        expected = u.find(v, start, start + len1)
-        try:
-            w_index = space.call_method(w_u, 'index', w_v,
-                                        space.newint(start),
-                                        space.newint(start + len1))
-        except OperationError as e:
-            if not e.match(space, space.w_ValueError):
-                raise
-            assert expected == -1
-        else:
-            assert space.int_w(w_index) == expected >= 0
-
-        w_index = space.call_method(w_u, 'find', w_v,
-                                    space.newint(start),
-                                    space.newint(start + len1))
-        assert space.int_w(w_index) == expected
-
-        rexpected = u.rfind(v, start, start + len1)
-        try:
-            w_index = space.call_method(w_u, 'rindex', w_v,
-                                        space.newint(start),
-                                        space.newint(start + len1))
-        except OperationError as e:
-            if not e.match(space, space.w_ValueError):
-                raise
-            assert rexpected == -1
-        else:
-            assert space.int_w(w_index) == rexpected >= 0
-
-        w_index = space.call_method(w_u, 'rfind', w_v,
-                                    space.newint(start),
-                                    space.newint(start + len1))
-        assert space.int_w(w_index) == rexpected
-
-        expected = u.startswith(v, start)
-        w_res = space.call_method(w_u, 'startswith', w_v,
-                                  space.newint(start))
-        assert w_res is space.newbool(expected)
-
-        expected = u.startswith(v, start, start + len1)
-        w_res = space.call_method(w_u, 'startswith', w_v,
-                                  space.newint(start),
-                                  space.newint(start + len1))
-        assert w_res is space.newbool(expected)
-
-        expected = u.endswith(v, start)
-        w_res = space.call_method(w_u, 'endswith', w_v,
-                                  space.newint(start))
-        assert w_res is space.newbool(expected)
-
-        expected = u.endswith(v, start, start + len1)
-        w_res = space.call_method(w_u, 'endswith', w_v,
-                                  space.newint(start),
-                                  space.newint(start + len1))
-        assert w_res is space.newbool(expected)
-
-
 class AppTestBytesObject:
 
     def setup_class(cls):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -39,55 +39,6 @@
         space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni)
 
 
-try:
-    from hypothesis import given, strategies
-except ImportError:
-    pass
-else:
-    @given(u=strategies.text(),
-           start=strategies.integers(min_value=0, max_value=10),
-           len1=strategies.integers(min_value=-1, max_value=10))
-    def test_hypo_index_find(u, start, len1, space):
-        if start + len1 < 0:
-            return   # skip this case
-        v = u[start : start + len1]
-        w_u = space.wrap(u)
-        w_v = space.wrap(v)
-        expected = u.find(v, start, start + len1)
-        try:
-            w_index = space.call_method(w_u, 'index', w_v,
-                                        space.newint(start),
-                                        space.newint(start + len1))
-        except OperationError as e:
-            if not e.match(space, space.w_ValueError):
-                raise
-            assert expected == -1
-        else:
-            assert space.int_w(w_index) == expected >= 0
-
-        w_index = space.call_method(w_u, 'find', w_v,
-                                    space.newint(start),
-                                    space.newint(start + len1))
-        assert space.int_w(w_index) == expected
-
-        rexpected = u.rfind(v, start, start + len1)
-        try:
-            w_index = space.call_method(w_u, 'rindex', w_v,
-                                        space.newint(start),
-                                        space.newint(start + len1))
-        except OperationError as e:
-            if not e.match(space, space.w_ValueError):
-                raise
-            assert rexpected == -1
-        else:
-            assert space.int_w(w_index) == rexpected >= 0
-
-        w_index = space.call_method(w_u, 'rfind', w_v,
-                                    space.newint(start),
-                                    space.newint(start + len1))
-        assert space.int_w(w_index) == rexpected
-
-
 class AppTestUnicodeStringStdOnly:
     def test_compares(self):
         assert type('a') != type(b'a')

From pypy.commits at gmail.com  Sat Nov  4 18:16:51 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:16:51 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: * Return a flag from check_utf8.
Message-ID: <59fe3c53.3bb0df0a.1515b.2ac3@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92933:a6e6ba074a22
Date: 2017-11-04 10:31 +0100
http://bitbucket.org/pypy/pypy/changeset/a6e6ba074a22/

Log:	* Return a flag from check_utf8.
	* Improve the tests and run it for more examples

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -194,14 +194,14 @@
         self.pos = pos
 
 def check_ascii(s):
-    res = _check_ascii(s)
+    res = first_non_ascii_char(s)
     if res < 0:
         return
     raise CheckError(res)
 
 
 @jit.elidable
-def _check_ascii(s):
+def first_non_ascii_char(s):
     for i in range(len(s)):
         if ord(s[i]) > 0x7F:
             return i
@@ -286,6 +286,9 @@
 _invalid_byte_3_of_4 = _invalid_cont_byte
 _invalid_byte_4_of_4 = _invalid_cont_byte
 
+def _surrogate_bytes(ch1, ch2):
+    return ch1 == 0xed and ch2 > 0x9f
+
 @enforceargs(allow_surrogates=bool)
 def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates):
     return (ordch2>>6 != 0x2 or    # 0b10
@@ -301,20 +304,22 @@
 
 def check_utf8(s, allow_surrogates, start=0, stop=-1):
     """Check that 's' is a utf-8-encoded byte string.
-    Returns the length (number of chars) or raise CheckError.
+
+    Returns the length (number of chars) and flags or raise CheckError.
     If allow_surrogates is False, then also raise if we see any.
     Note also codepoints_in_utf8(), which also computes the length
     faster by assuming that 's' is valid utf-8.
     """
-    res = _check_utf8(s, allow_surrogates, start, stop)
+    res, flags = _check_utf8(s, allow_surrogates, start, stop)
     if res >= 0:
-        return res
+        return res, flags
     raise CheckError(~res)
 
 @jit.elidable
 def _check_utf8(s, allow_surrogates, start, stop):
     pos = start
     continuation_bytes = 0
+    flag = FLAG_ASCII
     if stop < 0:
         end = len(s)
     else:
@@ -326,38 +331,44 @@
         if ordch1 <= 0x7F:
             continue
 
+        if flag == FLAG_ASCII:
+            flag = FLAG_REGULAR
+
         if ordch1 <= 0xC1:
-            return ~(pos - 1)
+            return ~(pos - 1), 0
 
         if ordch1 <= 0xDF:
             if pos >= end:
-                return ~(pos - 1)
+                return ~(pos - 1), 0
             ordch2 = ord(s[pos])
             pos += 1
 
             if _invalid_byte_2_of_2(ordch2):
-                return ~(pos - 2)
+                return ~(pos - 2), 0
             # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
             continuation_bytes += 1
             continue
 
         if ordch1 <= 0xEF:
             if (pos + 2) > end:
-                return ~(pos - 1)
+                return ~(pos - 1), 0
             ordch2 = ord(s[pos])
             ordch3 = ord(s[pos + 1])
             pos += 2
 
             if (_invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates) or
                 _invalid_byte_3_of_3(ordch3)):
-                return ~(pos - 3)
+                return ~(pos - 3), 0
+
+            if allow_surrogates and _surrogate_bytes(ordch1, ordch2):
+                flag = FLAG_HAS_SURROGATES
             # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
             continuation_bytes += 2
             continue
 
         if ordch1 <= 0xF4:
             if (pos + 3) > end:
-                return ~(pos - 1)
+                return ~(pos - 1), 0
             ordch2 = ord(s[pos])
             ordch3 = ord(s[pos + 1])
             ordch4 = ord(s[pos + 2])
@@ -366,16 +377,16 @@
             if (_invalid_byte_2_of_4(ordch1, ordch2) or
                 _invalid_byte_3_of_4(ordch3) or
                 _invalid_byte_4_of_4(ordch4)):
-                return ~(pos - 4)
+                return ~(pos - 4), 0
             # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
             continuation_bytes += 3
             continue
 
-        return ~(pos - 1)
+        return ~(pos - 1), 0
 
     assert pos == end
     assert pos - continuation_bytes >= 0
-    return pos - continuation_bytes
+    return pos - continuation_bytes, flag
 
 @jit.elidable
 def codepoints_in_utf8(value, start=0, end=sys.maxint):
@@ -408,9 +419,16 @@
 UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct(
     'utf8_loc',
     ('baseindex', lltype.Signed),
+    ('flag', lltype.Signed),
     ('ofs', lltype.FixedSizeArray(lltype.Char, 16))
     ))
 
+FLAG_REGULAR = 0
+FLAG_HAS_SURROGATES = 1
+FLAG_ASCII = 2
+# note that we never need index storage if we're pure ascii, but it's useful
+# for passing into W_UnicodeObject.__init__
+
 ASCII_INDEX_STORAGE_BLOCKS = 5
 ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE,
                                     ASCII_INDEX_STORAGE_BLOCKS,
@@ -423,6 +441,9 @@
 def null_storage():
     return lltype.nullptr(UTF8_INDEX_STORAGE)
 
+UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True)
+UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True)
+
 def create_utf8_index_storage(utf8, utf8len):
     """ Create an index storage which stores index of each 4th character
     in utf8 encoded unicode string.
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -28,6 +28,7 @@
     else:
         assert not raised
 
+ at settings(max_examples=10000)
 @given(strategies.binary(), strategies.booleans())
 def test_check_utf8(s, allow_surrogates):
     _test_check_utf8(s, allow_surrogates)
@@ -37,19 +38,32 @@
     _test_check_utf8(u.encode('utf-8'), allow_surrogates)
 
 def _test_check_utf8(s, allow_surrogates):
+    def _has_surrogates(s):
+        for u in s.decode('utf8'):
+            if 0xD800 <= ord(u) <= 0xDB7F:
+                return True
+            if 0xDC00 <= ord(u) <= 0xDBFF:
+                return True
+        return False
+
     try:
         u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True,
                                          allow_surrogates=allow_surrogates)
         valid = True
     except UnicodeDecodeError as e:
         valid = False
-    try:
-        length = rutf8.check_utf8(s, allow_surrogates)
-    except rutf8.CheckError:
+    length, flag = rutf8._check_utf8(s, allow_surrogates, 0, len(s))
+    if length < 0:
         assert not valid
+        assert ~(length) == e.start
     else:
         assert valid
         assert length == len(u)
+        if flag == rutf8.FLAG_ASCII:
+            s.decode('ascii') # assert did not raise
+        elif flag == rutf8.FLAG_HAS_SURROGATES:
+            assert allow_surrogates
+            assert _has_surrogates(s)
 
 @given(strategies.characters())
 def test_next_pos(uni):

From pypy.commits at gmail.com  Sat Nov  4 18:16:55 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:16:55 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: finish whacking until the objspace
 tests pass
Message-ID: <59fe3c57.04361c0a.dda17.6b03@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92935:47de95da2bbb
Date: 2017-11-04 15:26 +0100
http://bitbucket.org/pypy/pypy/changeset/47de95da2bbb/

Log:	finish whacking until the objspace tests pass

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -8,3 +8,4 @@
 * better flag handling in split/splitlines maybe?
 * find all the fast-paths that we want to do with utf8 (we only do
   utf-8 now, not UTF8 or utf8) for decode/encode
+* encode_error_handler has XXX
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -20,11 +20,13 @@
 @specialize.memo()
 def encode_error_handler(space):
     # Fast version of the "strict" errors handler.
-    def raise_unicode_exception_encode(errors, encoding, msg, w_u,
+    def raise_unicode_exception_encode(errors, encoding, msg, u, u_len,
                                        startingpos, endingpos):
+        # XXX fix once we stop using runicode.py
+        flag = _get_flag(u.decode('utf8'))
         raise OperationError(space.w_UnicodeEncodeError,
                              space.newtuple([space.newtext(encoding),
-                                             w_u,
+                                             space.newutf8(u, u_len, flag),
                                              space.newint(startingpos),
                                              space.newint(endingpos),
                                              space.newtext(msg)]))
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -164,7 +164,7 @@
         if isinstance(x, str):
             return self.newtext(x)
         if isinstance(x, unicode):
-            return self.newutf8(x.encode('utf8'), len(x))
+            return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR)
         if isinstance(x, float):
             return W_FloatObject(x)
         if isinstance(x, W_Root):
diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py
--- a/pypy/objspace/std/test/test_index.py
+++ b/pypy/objspace/std/test/test_index.py
@@ -1,5 +1,7 @@
 from py.test import raises
 
+from rpython.rlib import rutf8
+
 class AppTest_IndexProtocol:
     def setup_class(self):
         w_oldstyle = self.space.appexec([], """():
@@ -263,7 +265,8 @@
 class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase):
     def setup_method(self, method):
         SeqTestCase.setup_method(self, method)
-        self.w_seq = self.space.wrap(u"this is a test")
+        self.w_seq = self.space.newutf8("this is a test", len("this is a test"),
+                                        rutf8.FLAG_ASCII)
         self.w_const = self.space.appexec([], """(): return unicode""")
 
 
diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py
--- a/pypy/objspace/std/test/test_lengthhint.py
+++ b/pypy/objspace/std/test/test_lengthhint.py
@@ -1,3 +1,6 @@
+
+from rpython.rlib import rutf8
+
 from pypy.module._collections.interp_deque import W_Deque
 from pypy.module.itertools.interp_itertools import W_Repeat
 
@@ -71,7 +74,8 @@
         self._test_length_hint(self.space.wrap('P' * self.SIZE))
 
     def test_unicode(self):
-        self._test_length_hint(self.space.wrap(u'Y' * self.SIZE))
+        self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE,
+                                                  rutf8.FLAG_ASCII))
 
     def test_tuple(self):
         self._test_length_hint(self.space.wrap(tuple(self.ITEMS)))
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -22,7 +22,7 @@
                           BytesListStrategy)
         #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy,
         #                  UnicodeListStrategy)
-        assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy,
+        assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy,
                           ObjectListStrategy) # mixed unicode and bytes
 
     def test_empty_to_any(self):
diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py
--- a/pypy/objspace/std/test/test_obj.py
+++ b/pypy/objspace/std/test/test_obj.py
@@ -17,7 +17,7 @@
         cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info'))
 
         def w_unwrap_wrap_unicode(space, w_obj):
-            return space.newutf8(space.utf8_w(w_obj), w_obj._length)
+            return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag())
         cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode))
         def w_unwrap_wrap_str(space, w_obj):
             return space.wrap(space.str_w(w_obj))
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1834,7 +1834,7 @@
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
     unistr = w_unistr._utf8
-    result = ['\0'] * len(unistr)
+    result = ['\0'] * w_unistr._length
     digits = ['0', '1', '2', '3', '4',
               '5', '6', '7', '8', '9']
     i = 0
@@ -1843,6 +1843,8 @@
         uchr = rutf8.codepoint_at_pos(unistr, i)
         if rutf8.isspace(unistr, i):
             result[res_pos] = ' '
+            res_pos += 1
+            i = rutf8.next_codepoint_pos(unistr, i)
             continue
         try:
             result[res_pos] = digits[unicodedb.decimal(uchr)]

From pypy.commits at gmail.com  Sat Nov  4 18:16:57 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:16:57 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: small fixes,
 for revisting later once we actually want tests to pass
Message-ID: <59fe3c59.4dbbdf0a.8cfd1.b87c@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92936:71debd44669a
Date: 2017-11-04 15:31 +0100
http://bitbucket.org/pypy/pypy/changeset/71debd44669a/

Log:	small fixes, for revisting later once we actually want tests to pass

diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -478,10 +478,10 @@
     except rutf8.CheckError as e:
         # XXX do the way around runicode - we can optimize it later if we
         # decide we care about obscure cases
-        xxx
         res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
             errors, final, state.decode_error_handler)
-        return space.newtuple([space.newutf8(res, lgt),
+        flag = unicodehelper._get_flag(res.decode("utf8"))
+        return space.newtuple([space.newutf8(res, lgt, flag),
                                space.newint(consumed)])
     else:
         return space.newtuple([space.newutf8(string, lgt, flag),
@@ -700,7 +700,8 @@
         final, state.decode_error_handler,
         unicode_name_handler)
 
-    return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)])
+    flag = unicodehelper._get_flag(result.decode('utf8'))
+    return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)])
 
 # ____________________________________________________________
 # Unicode-internal
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -74,11 +74,12 @@
             space.newtext(e.reason)]))
 
 def wrap_unicodeencodeerror(space, e, input, inputlen, name):
+    flag = 13
     raise OperationError(
         space.w_UnicodeEncodeError,
         space.newtuple([
             space.newtext(name),
-            space.newutf8(input, inputlen),
+            space.newutf8(input, inputlen, flag),
             space.newint(e.start),
             space.newint(e.end),
             space.newtext(e.reason)]))
diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py
--- a/pypy/module/pyexpat/interp_pyexpat.py
+++ b/pypy/module/pyexpat/interp_pyexpat.py
@@ -478,8 +478,8 @@
             # I suppose this is a valid utf8, but there is noone to check
             # and noone to catch an error either
             try:
-                lgt = rutf8.check_utf8(s, True)
-                return space.newutf8(s, lgt)
+                lgt, flag = rutf8.check_utf8(s, True)
+                return space.newutf8(s, lgt, flag)
             except rutf8.CheckError:
                 from pypy.interpreter import unicodehelper
                 # get the correct error msg

From pypy.commits at gmail.com  Sat Nov  4 18:16:59 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:16:59 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: first attempt at fixing the
 unicode surrogate mess
Message-ID: <59fe3c5b.4f931c0a.bc56f.0826@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92937:0c93ee971f62
Date: 2017-11-04 19:07 +0100
http://bitbucket.org/pypy/pypy/changeset/0c93ee971f62/

Log:	first attempt at fixing the unicode surrogate mess

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -1,4 +1,3 @@
-* unskip tests in test_unicodeobject.py
 * rutf8.prev_codepoint_pos should use r_uint
 * find a better way to run "find" without creating the index storage,
   if one is not already readily available
@@ -9,3 +8,4 @@
 * find all the fast-paths that we want to do with utf8 (we only do
   utf-8 now, not UTF8 or utf8) for decode/encode
 * encode_error_handler has XXX
+* reenable list strategies for ascii-only unicode
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -72,8 +72,8 @@
     substr = s[ps : q]
     if rawmode or '\\' not in s[ps:]:
         if need_encoding:
-            utf, lgt = unicodehelper.decode_utf8(space, substr)
-            w_u = space.newutf8(utf, lgt)
+            utf, (lgt, flag) = unicodehelper.decode_utf8(space, substr)
+            w_u = space.newutf8(utf, lgt, flag)
             w_v = unicodehelper.encode(space, w_u, encoding)
             return w_v
         else:
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -45,14 +45,14 @@
 
 def _has_surrogate(u):
     for c in u:
-        if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F:
+        if 0xD800 <= ord(c) <= 0xDFFF:
             return True
     return False
 
 def _get_flag(u):
     flag = rutf8.FLAG_ASCII
     for c in u:
-        if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F:
+        if 0xD800 <= ord(c) <= 0xDFFF:
             return rutf8.FLAG_HAS_SURROGATES
         if ord(c) >= 0x80:
             flag = rutf8.FLAG_REGULAR
@@ -143,7 +143,7 @@
 def str_decode_ascii(s, slen, errors, final, errorhandler):
     try:
         rutf8.check_ascii(s)
-        return s, slen, len(s)
+        return s, slen, len(s), rutf8.FLAG_ASCII
     except rutf8.CheckError:
         w = DecodeWrapper((errorhandler))
         u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle)
diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -30,8 +30,8 @@
         raise oefmt(space.w_ValueError, "unichr() arg out of range")
     if code < 0x80:
         flag = rutf8.FLAG_ASCII
-    elif 0xDB80 <= code <= 0xCBFF or 0xD800 <= code <= 0xDB7F:
-        flag = rutf8.FLAG_HAS_SURROGATE
+    elif 0xD800 <= code <= 0xDFFF:
+        flag = rutf8.FLAG_HAS_SURROGATES
     else:
         flag = rutf8.FLAG_REGULAR
     return space.newutf8(s, 1, flag)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -516,8 +516,9 @@
             return w_obj.listview_unicode()
         if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject:
             return w_obj.listview_unicode()
-        #if isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj):
-        #    return w_obj.listview_unicode()
+        if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj)
+            and w_obj.is_ascii()):
+            return w_obj.listview_unicode()
         if isinstance(w_obj, W_ListObject) and self._uses_list_iter(w_obj):
             return w_obj.getitems_unicode()
         return None
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -27,7 +27,6 @@
         assert len(warnings) == 2
 
     def test_listview_unicode(self):
-        py.test.skip("skip for new")
         w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII)
         assert self.space.listview_unicode(w_str) == list(u"abcd")
 
@@ -662,7 +661,6 @@
         assert unicode('+AB', 'utf-7', 'replace') == u'\ufffd'
 
     def test_codecs_utf8(self):
-        skip("unskip this before merge")
         assert u''.encode('utf-8') == ''
         assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
         assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
@@ -695,7 +693,6 @@
         assert unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' 
 
     def test_codecs_errors(self):
-        skip("some nonsense in handling of ignore and replace")
         # Error handling (encoding)
         raises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
         raises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -93,6 +93,8 @@
         return space.text_w(space.str(self))
 
     def utf8_w(self, space):
+        if self._has_surrogates():
+            return rutf8.reencode_utf8_with_surrogates(self._utf8)
         return self._utf8
 
     def readbuf_w(self, space):
@@ -115,8 +117,8 @@
     charbuf_w = str_w
 
     def listview_unicode(self):
-        XXX # fix at some point
-        return _create_list_from_unicode(self._value)
+        assert self.is_ascii()
+        return _create_list_from_unicode(self._utf8)
 
     def ord(self, space):
         if self._len() != 1:
@@ -410,7 +412,7 @@
                                 "or unicode")
             try:
                 if codepoint >= 0x80:
-                    flag = self._combine_flags(flag, rutf8.FLAG_NORMAL)
+                    flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
                 rutf8.unichr_as_utf8_append(result, codepoint,
                                             allow_surrogates=True)
                 result_length += 1
@@ -632,7 +634,7 @@
         return rutf8.FLAG_REGULAR
 
     def _get_flag(self):
-        if self._is_ascii():
+        if self.is_ascii():
             return rutf8.FLAG_ASCII
         elif self._has_surrogates():
             return rutf8.FLAG_HAS_SURROGATES
@@ -977,7 +979,7 @@
         end = rutf8.next_codepoint_pos(self._utf8, start)
         return W_UnicodeObject(self._utf8[start:end], 1, self._get_flag())
 
-    def _is_ascii(self):
+    def is_ascii(self):
         return self._index_storage is rutf8.UTF8_IS_ASCII
 
     def _has_surrogates(self):
@@ -986,7 +988,8 @@
                  self._index_storage.flag == rutf8.FLAG_HAS_SURROGATES))
 
     def _index_to_byte(self, index):
-        if self._is_ascii():
+        if self.is_ascii():
+            assert index >= 0
             return index
         return rutf8.codepoint_position_at_index(
             self._utf8, self._get_index_storage(), index)
@@ -1195,7 +1198,7 @@
                 assert False, "always raises"
             return space.newbytes(s)
         if ((encoding is None and space.sys.defaultencoding == 'utf8') or
-             encoding == 'utf-8'):
+             encoding == 'utf-8' or encoding == 'utf8'):
             return space.newbytes(space.utf8_w(w_object))
     if w_encoder is None:
         from pypy.module._codecs.interp_codecs import lookup_codec
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -388,6 +388,34 @@
     assert pos - continuation_bytes >= 0
     return pos - continuation_bytes, flag
 
+def reencode_utf8_with_surrogates(utf8):
+    """ Receiving valid UTF8 which contains surrogates, combine surrogate
+    pairs into correct UTF8 with pairs collpased. This is a rare case
+    and you should not be using surrogate pairs in the first place,
+    so the performance here is a bit secondary
+    """
+    s = StringBuilder(len(utf8))
+    stop = len(utf8)
+    i = 0
+    while i < stop:
+        uchr = codepoint_at_pos(utf8, i)
+        if 0xD800 <= uchr <= 0xDBFF:
+            high = uchr
+            i = next_codepoint_pos(utf8, i)
+            if i >= stop:
+                unichr_as_utf8_append(s, uchr, True)
+                break
+            low = codepoint_at_pos(utf8, i)
+            if 0xDC00 <= low <= 0xDFFF:
+                uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00)
+                i = next_codepoint_pos(utf8, i)                
+            # else not really a surrogate pair, just append high
+        else:
+            i = next_codepoint_pos(utf8, i)
+        unichr_as_utf8_append(s, uchr, True)
+    return s.build()
+
+
 @jit.elidable
 def codepoints_in_utf8(value, start=0, end=sys.maxint):
     """Return the number of codepoints in the UTF-8 byte string
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -40,9 +40,7 @@
 def _test_check_utf8(s, allow_surrogates):
     def _has_surrogates(s):
         for u in s.decode('utf8'):
-            if 0xD800 <= ord(u) <= 0xDB7F:
-                return True
-            if 0xDC00 <= ord(u) <= 0xDBFF:
+            if 0xD800 <= ord(u) <= 0xDFFF:
                 return True
         return False
 

From pypy.commits at gmail.com  Sat Nov  4 18:16:53 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:16:53 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: progress on having flags correctly
 propagated, almost there
Message-ID: <59fe3c55.5d87df0a.896e7.cb97@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92934:29ce3a4ea76f
Date: 2017-11-04 14:38 +0100
http://bitbucket.org/pypy/pypy/changeset/29ce3a4ea76f/

Log:	progress on having flags correctly propagated, almost there

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -4,3 +4,7 @@
   if one is not already readily available
 * fix _pypyjson
 * fix cpyext
+* write the correct jit_elidable in _get_index_storage
+* better flag handling in split/splitlines maybe?
+* find all the fast-paths that we want to do with utf8 (we only do
+  utf-8 now, not UTF8 or utf8) for decode/encode
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1764,8 +1764,10 @@
         return self.realutf8_w(w_obj).decode('utf8')
 
     def newunicode(self, u):
+        from pypy.interpreter import unicodehelper
         assert isinstance(u, unicode)
-        return self.newutf8(u.encode("utf8"), len(u))
+        # XXX let's disallow that
+        return self.newutf8(u.encode("utf8"), len(u), unicodehelper._get_flag(u))
 
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -59,10 +59,11 @@
         else:
             substr = decode_unicode_utf8(space, s, ps, q)
         if rawmode:
-            v, length = unicodehelper.decode_raw_unicode_escape(space, substr)
+            r = unicodehelper.decode_raw_unicode_escape(space, substr)
         else:
-            v, length = unicodehelper.decode_unicode_escape(space, substr)
-        return space.newutf8(v, length)
+            r = unicodehelper.decode_unicode_escape(space, substr)
+        v, length, flag = r
+        return space.newutf8(v, length, flag)
 
     need_encoding = (encoding is not None and
                      encoding != "utf-8" and encoding != "utf8" and
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -20,11 +20,11 @@
 @specialize.memo()
 def encode_error_handler(space):
     # Fast version of the "strict" errors handler.
-    def raise_unicode_exception_encode(errors, encoding, msg, u, u_len,
+    def raise_unicode_exception_encode(errors, encoding, msg, w_u,
                                        startingpos, endingpos):
         raise OperationError(space.w_UnicodeEncodeError,
                              space.newtuple([space.newtext(encoding),
-                                             space.newutf8(u, u_len),
+                                             w_u,
                                              space.newint(startingpos),
                                              space.newint(endingpos),
                                              space.newtext(msg)]))
@@ -41,6 +41,21 @@
     from pypy.objspace.std.unicodeobject import encode_object
     return encode_object(space, w_data, encoding, errors)
 
+def _has_surrogate(u):
+    for c in u:
+        if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F:
+            return True
+    return False
+
+def _get_flag(u):
+    flag = rutf8.FLAG_ASCII
+    for c in u:
+        if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F:
+            return rutf8.FLAG_HAS_SURROGATES
+        if ord(c) >= 0x80:
+            flag = rutf8.FLAG_REGULAR
+    return flag
+
 # These functions take and return unwrapped rpython strings and unicodes
 def decode_unicode_escape(space, string):
     state = space.fromcache(interp_codecs.CodecState)
@@ -52,7 +67,14 @@
         final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
         unicodedata_handler=unicodedata_handler)
     # XXX argh.  we want each surrogate to be encoded separately
-    return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
+    utf8 = ''.join([u.encode('utf8') for u in result_u])
+    if rutf8.first_non_ascii_char(utf8) == -1:
+        flag = rutf8.FLAG_ASCII
+    elif _has_surrogate(result_u):
+        flag = rutf8.FLAG_HAS_SURROGATES
+    else:
+        flag = rutf8.FLAG_REGULAR
+    return utf8, len(result_u), flag
 
 def decode_raw_unicode_escape(space, string):
     # XXX pick better length, maybe
@@ -61,7 +83,14 @@
         string, len(string), "strict",
         final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle)
     # XXX argh.  we want each surrogate to be encoded separately
-    return ''.join([u.encode('utf8') for u in result_u]), len(result_u)
+    utf8 = ''.join([u.encode('utf8') for u in result_u])
+    if rutf8.first_non_ascii_char(utf8) == -1:
+        flag = rutf8.FLAG_ASCII
+    elif _has_surrogate(result_u):
+        flag = rutf8.FLAG_HAS_SURROGATES
+    else:
+        flag = rutf8.FLAG_REGULAR
+    return utf8, len(result_u), flag
 
 def check_ascii_or_raise(space, string):
     try:
@@ -78,12 +107,12 @@
     # you still get two surrogate unicode characters in the result.
     # These are the Python2 rules; Python3 differs.
     try:
-        length = rutf8.check_utf8(string, allow_surrogates=True)
+        length, flag = rutf8.check_utf8(string, allow_surrogates=True)
     except rutf8.CheckError as e:
         decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string,
                                     e.pos, e.pos + 1)
         assert False, "unreachable"
-    return length
+    return length, flag
 
 def encode_utf8(space, uni):
     # DEPRECATED
@@ -116,7 +145,7 @@
     except rutf8.CheckError:
         w = DecodeWrapper((errorhandler))
         u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle)
-        return u.encode('utf8'), pos, len(u)
+        return u.encode('utf8'), pos, len(u), _get_flag(u)
 
 # XXX wrappers, think about speed
 
@@ -139,14 +168,14 @@
     w = DecodeWrapper(errorhandler)
     u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle,
         runicode.allow_surrogate_by_default)
-    return u.encode('utf8'), pos, len(u)
+    return u.encode('utf8'), pos, len(u), _get_flag(u)
 
 def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler):
     w = DecodeWrapper(errorhandler)
     u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final,
                                                 w.handle,
                                                 ud_handler)
-    return u.encode('utf8'), pos, len(u)
+    return u.encode('utf8'), pos, len(u), _get_flag(u)
 
 def setup_new_encoders(encoding):
     encoder_name = 'utf8_encode_' + encoding
@@ -160,7 +189,7 @@
     def decoder(s, slen, errors, final, errorhandler):
         w = DecodeWrapper((errorhandler))
         u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle)
-        return u.encode('utf8'), pos, len(u)
+        return u.encode('utf8'), pos, len(u), _get_flag(u)
     encoder.__name__ = encoder_name
     decoder.__name__ = decoder_name
     if encoder_name not in globals():
diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -28,7 +28,13 @@
         s = rutf8.unichr_as_utf8(code, allow_surrogates=True)
     except ValueError:
         raise oefmt(space.w_ValueError, "unichr() arg out of range")
-    return space.newutf8(s, 1)
+    if code < 0x80:
+        flag = rutf8.FLAG_ASCII
+    elif 0xDB80 <= code <= 0xCBFF or 0xD800 <= code <= 0xDB7F:
+        flag = rutf8.FLAG_HAS_SURROGATE
+    else:
+        flag = rutf8.FLAG_REGULAR
+    return space.newutf8(s, 1, flag)
 
 def len(space, w_obj):
     "len(object) -> integer\n\nReturn the number of items of a sequence or mapping."
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -39,8 +39,8 @@
                 w_input = space.newbytes(input)
             else:
                 w_cls = space.w_UnicodeEncodeError
-                length = rutf8.check_utf8(input, allow_surrogates=True)
-                w_input = space.newutf8(input, length)
+                length, flag = rutf8.check_utf8(input, allow_surrogates=True)
+                w_input = space.newutf8(input, length, flag)
             w_exc =  space.call_function(
                 w_cls,
                 space.newtext(encoding),
@@ -189,7 +189,7 @@
 def ignore_errors(space, w_exc):
     check_exception(space, w_exc)
     w_end = space.getattr(w_exc, space.newtext('end'))
-    return space.newtuple([space.newutf8('', 0), w_end])
+    return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), w_end])
 
 REPLACEMENT = u'\ufffd'.encode('utf8')
 
@@ -200,13 +200,13 @@
     size = space.int_w(w_end) - space.int_w(w_start)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
         text = '?' * size
-        return space.newtuple([space.newutf8(text, size), w_end])
+        return space.newtuple([space.newutf8(text, size, rutf8.FLAG_ASCII), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
         text = REPLACEMENT
-        return space.newtuple([space.newutf8(text, 1), w_end])
+        return space.newtuple([space.newutf8(text, 1, rutf8.FLAG_REGULAR), w_end])
     elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError):
         text = REPLACEMENT * size
-        return space.newtuple([space.newutf8(text, size), w_end])
+        return space.newtuple([space.newutf8(text, size, rutf8.FLAG_REGULAR), w_end])
     else:
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
@@ -403,9 +403,9 @@
         final = space.is_true(w_final)
         state = space.fromcache(CodecState)
         func = getattr(unicodehelper, rname)
-        result, consumed, length = func(string, len(string), errors,
-                                final, state.decode_error_handler)
-        return space.newtuple([space.newutf8(result, length),
+        result, consumed, length, flag = func(string, len(string), errors,
+                                              final, state.decode_error_handler)
+        return space.newtuple([space.newutf8(result, length, flag),
                                space.newint(consumed)])
     wrap_decoder.func_name = rname
     globals()[name] = wrap_decoder
@@ -448,7 +448,7 @@
 # "allow_surrogates=True"
 @unwrap_spec(utf8='utf8', errors='text_or_none')
 def utf_8_encode(space, utf8, errors="strict"):
-    length = rutf8.check_utf8(utf8, allow_surrogates=True)
+    length, _ = rutf8.check_utf8(utf8, allow_surrogates=True)
     return space.newtuple([space.newbytes(utf8), space.newint(length)])
 #@unwrap_spec(uni=unicode, errors='text_or_none')
 #def utf_8_encode(space, uni, errors="strict"):
@@ -474,16 +474,17 @@
     state = space.fromcache(CodecState)
     # call the fast version for checking
     try:
-        lgt = rutf8.check_utf8(string, allow_surrogates=True)
+        lgt, flag = rutf8.check_utf8(string, allow_surrogates=True)
     except rutf8.CheckError as e:
         # XXX do the way around runicode - we can optimize it later if we
         # decide we care about obscure cases
+        xxx
         res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
             errors, final, state.decode_error_handler)
         return space.newtuple([space.newutf8(res, lgt),
                                space.newint(consumed)])
     else:
-        return space.newtuple([space.newutf8(string, lgt),
+        return space.newtuple([space.newutf8(string, lgt, flag),
                                space.newint(len(string))])
 
 @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -403,8 +403,8 @@
 @unmarshaller(TYPE_UNICODE)
 def unmarshal_unicode(space, u, tc):
     arg = u.get_str()
-    length = unicodehelper.check_utf8_or_raise(space, arg)
-    return space.newutf8(arg, length)
+    length, flag = unicodehelper.check_utf8_or_raise(space, arg)
+    return space.newutf8(arg, length, flag)
 
 @marshaller(W_SetObject)
 def marshal_set(space, w_set, m):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -317,8 +317,8 @@
         for utf in lst:
             assert utf is not None
             assert isinstance(utf, str)
-            length = rutf8.check_utf8(utf, allow_surrogates=True)
-            res_w.append(self.newutf8(utf, length))
+            length, flag = rutf8.check_utf8(utf, allow_surrogates=True)
+            res_w.append(self.newutf8(utf, length, flag))
         return self.newlist(res_w)
 
     def newlist_int(self, list_i):
@@ -369,10 +369,10 @@
             return self.w_None
         return self.newtext(s)
 
-    def newutf8(self, utf8s, length):
+    def newutf8(self, utf8s, length, flag):
         assert utf8s is not None
         assert isinstance(utf8s, str)
-        return W_UnicodeObject(utf8s, length)
+        return W_UnicodeObject(utf8s, length, flag)
 
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -3,6 +3,7 @@
 import py
 import sys
 from hypothesis import given, strategies, settings, example
+from rpython.rlib import rutf8
 from pypy.interpreter.error import OperationError
 
 
@@ -27,12 +28,12 @@
 
     def test_listview_unicode(self):
         py.test.skip("skip for new")
-        w_str = self.space.wrap(u'abcd')
+        w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII)
         assert self.space.listview_unicode(w_str) == list(u"abcd")
 
     def test_new_shortcut(self):
         space = self.space
-        w_uni = self.space.wrap(u'abcd')
+        w_uni = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII)
         w_new = space.call_method(
                 space.w_unicode, "__new__", space.w_unicode, w_uni)
         assert w_new is w_uni
@@ -44,8 +45,8 @@
             return   # skip this case
         v = u[start : start + len1]
         space = self.space
-        w_u = space.wrap(u)
-        w_v = space.wrap(v)
+        w_u = space.newutf8(u.encode('utf8'), len(u), rutf8.FLAG_REGULAR)
+        w_v = space.newutf8(v.encode('utf8'), len(v), rutf8.FLAG_REGULAR)
         expected = u.find(v, start, start + len1)
         try:
             w_index = space.call_method(w_u, 'index', w_v,
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -36,14 +36,24 @@
     _immutable_fields_ = ['_utf8']
 
     @enforceargs(utf8str=str)
-    def __init__(self, utf8str, length):
+    def __init__(self, utf8str, length, flag):
         assert isinstance(utf8str, str)
         assert length >= 0
         self._utf8 = utf8str
         self._length = length
-        self._index_storage = rutf8.null_storage()
-        #if not we_are_translated():
-        #    assert rutf8.check_utf8(utf8str, allow_surrogates=True) == length
+        if flag == rutf8.FLAG_ASCII:
+            self._index_storage = rutf8.UTF8_IS_ASCII
+        elif flag == rutf8.FLAG_HAS_SURROGATES:
+            self._index_storage = rutf8.UTF8_HAS_SURROGATES
+        else:
+            assert flag == rutf8.FLAG_REGULAR
+            self._index_storage = rutf8.null_storage()
+        # the storage can be one of:
+        # - null, unicode with no surrogates
+        # - rutf8.UTF8_HAS_SURROGATES
+        # - rutf8.UTF8_IS_ASCII
+        # - malloced object, which means it has index, then
+        #   _index_storage.flags determines the kind
 
     def __repr__(self):
         """representation for debugging purposes"""
@@ -222,7 +232,11 @@
 
         assert isinstance(w_value, W_UnicodeObject)
         w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
-        W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length)
+        W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length,
+                                 w_value._get_flag())
+        if w_value._index_storage:
+            # copy the storage if it's there
+            w_newobj._index_storage = w_value._index_storage
         return w_newobj
 
     def descr_repr(self, space):
@@ -326,29 +340,33 @@
     def descr_swapcase(self, space):
         selfvalue = self._utf8
         builder = StringBuilder(len(selfvalue))
+        flag = self._get_flag()
         i = 0
         while i < len(selfvalue):
             ch = rutf8.codepoint_at_pos(selfvalue, i)
             i = rutf8.next_codepoint_pos(selfvalue, i)
             if unicodedb.isupper(ch):
-                rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(ch))
+                ch = unicodedb.tolower(ch)
             elif unicodedb.islower(ch):
-                rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(ch))
-            else:
-                rutf8.unichr_as_utf8_append(builder, ch)
-        return W_UnicodeObject(builder.build(), self._length)
+                ch = unicodedb.toupper(ch)
+            if ch >= 0x80:
+                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+            rutf8.unichr_as_utf8_append(builder, ch)
+        return W_UnicodeObject(builder.build(), self._length, flag)
 
     def descr_title(self, space):
         if len(self._utf8) == 0:
             return self
-        return W_UnicodeObject(self.title(self._utf8), self._len())
+        utf8, flag = self.title_unicode(self._utf8)
+        return W_UnicodeObject(utf8, self._len(), flag)
 
     @jit.elidable
-    def title(self, value):
+    def title_unicode(self, value):
         input = self._utf8
         builder = StringBuilder(len(input))
         i = 0
         previous_is_cased = False
+        flag = self._get_flag()
         while i < len(input):
             ch = rutf8.codepoint_at_pos(input, i)
             i = rutf8.next_codepoint_pos(input, i)
@@ -356,14 +374,17 @@
                 ch = unicodedb.totitle(ch)
             else:
                 ch = unicodedb.tolower(ch)
+            if ch >= 0x80:
+                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, ch)
             previous_is_cased = unicodedb.iscased(ch)
-        return builder.build()
+        return builder.build(), flag
 
     def descr_translate(self, space, w_table):
         input = self._utf8
         result = StringBuilder(len(input))
         result_length = 0
+        flag = self._get_flag()
         i = 0
         while i < len(input):
             codepoint = rutf8.codepoint_at_pos(input, i)
@@ -380,6 +401,7 @@
                     codepoint = space.int_w(w_newval)
                 elif isinstance(w_newval, W_UnicodeObject):
                     result.append(w_newval._utf8)
+                    flag = self._combine_flags(flag, w_newval._get_flag())
                     result_length += w_newval._length
                     continue
                 else:
@@ -387,13 +409,15 @@
                                 "character mapping must return integer, None "
                                 "or unicode")
             try:
+                if codepoint >= 0x80:
+                    flag = self._combine_flags(flag, rutf8.FLAG_NORMAL)
                 rutf8.unichr_as_utf8_append(result, codepoint,
                                             allow_surrogates=True)
                 result_length += 1
             except ValueError:
                 raise oefmt(space.w_TypeError,
                             "character mapping must be in range(0x110000)")
-        return W_UnicodeObject(result.build(), result_length)
+        return W_UnicodeObject(result.build(), result_length, flag)
 
     def descr_find(self, space, w_sub, w_start=None, w_end=None):
         w_result = self._unwrap_and_search(space, w_sub, w_start, w_end)
@@ -472,7 +496,7 @@
             newlen += dist
             oldtoken = token
 
-        return W_UnicodeObject(expanded, newlen)
+        return W_UnicodeObject(expanded, newlen, self._get_flag())
 
     _StringMethods_descr_join = descr_join
     def descr_join(self, space, w_list):
@@ -506,11 +530,14 @@
     def descr_lower(self, space):
         builder = StringBuilder(len(self._utf8))
         pos = 0
+        flag = self._get_flag()
         while pos < len(self._utf8):
             lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
+            if lower >= 0x80:
+                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
             pos = rutf8.next_codepoint_pos(self._utf8, pos)
-        return W_UnicodeObject(builder.build(), self._len())
+        return W_UnicodeObject(builder.build(), self._len(), flag)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -595,6 +622,22 @@
             return True
         return endswith(value, prefix, start, end)
 
+    @staticmethod
+    def _combine_flags(self_flag, other_flag):
+        if self_flag == rutf8.FLAG_ASCII and other_flag == rutf8.FLAG_ASCII:
+            return rutf8.FLAG_ASCII
+        elif (self_flag == rutf8.FLAG_HAS_SURROGATES or
+              other_flag == rutf8.FLAG_HAS_SURROGATES):
+            return rutf8.FLAG_HAS_SURROGATES
+        return rutf8.FLAG_REGULAR
+
+    def _get_flag(self):
+        if self._is_ascii():
+            return rutf8.FLAG_ASCII
+        elif self._has_surrogates():
+            return rutf8.FLAG_HAS_SURROGATES
+        return rutf8.FLAG_REGULAR
+
     def descr_add(self, space, w_other):
         try:
             w_other = self.convert_arg_to_w_unicode(space, w_other)
@@ -602,8 +645,9 @@
             if e.match(space, space.w_TypeError):
                 return space.w_NotImplemented
             raise
+        flag = self._combine_flags(self._get_flag(), w_other._get_flag())
         return W_UnicodeObject(self._utf8 + w_other._utf8,
-                               self._len() + w_other._len())
+                               self._len() + w_other._len(), flag)
 
     @jit.look_inside_iff(lambda self, space, list_w, size:
                          jit.loop_unrolling_heuristic(list_w, size))
@@ -613,6 +657,7 @@
 
         prealloc_size = len(value) * (size - 1)
         unwrapped = newlist_hint(size)
+        flag = self._get_flag()
         for i in range(size):
             w_s = list_w[i]
             check_item = self._join_check_item(space, w_s)
@@ -625,6 +670,7 @@
             # XXX Maybe the extra copy here is okay? It was basically going to
             #     happen anyway, what with being placed into the builder
             w_u = self.convert_arg_to_w_unicode(space, w_s)
+            flag = self._combine_flags(flag, w_u._get_flag())
             unwrapped.append(w_u._utf8)
             lgt += w_u._length
             prealloc_size += len(unwrapped[i])
@@ -634,7 +680,7 @@
             if value and i != 0:
                 sb.append(value)
             sb.append(unwrapped[i])
-        return W_UnicodeObject(sb.build(), lgt)
+        return W_UnicodeObject(sb.build(), lgt, flag)
 
     @unwrap_spec(keepends=bool)
     def descr_splitlines(self, space, keepends=False):
@@ -663,28 +709,33 @@
                     lgt += line_end_chars
             assert eol >= 0
             assert sol >= 0
-            strs_w.append(W_UnicodeObject(value[sol:eol], lgt))
+            # XXX we can do better with flags here, if we want to
+            strs_w.append(W_UnicodeObject(value[sol:eol], lgt, self._get_flag()))
         return space.newlist(strs_w)
 
     def descr_upper(self, space):
         value = self._utf8
         builder = StringBuilder(len(value))
+        flag = self._get_flag()
         i = 0
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
+            uchar = unicodedb.toupper(uchar)
+            if uchar >= 0x80:
+                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
             i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar))
-        return W_UnicodeObject(builder.build(), self._length)
+            rutf8.unichr_as_utf8_append(builder, uchar)
+        return W_UnicodeObject(builder.build(), self._length, flag)
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
         selfval = self._utf8
         if len(selfval) == 0:
-            return W_UnicodeObject('0' * width, width)
+            return W_UnicodeObject('0' * width, width, rutf8.FLAG_ASCII)
         num_zeros = width - self._len()
         if num_zeros <= 0:
             # cannot return self, in case it is a subclass of str
-            return W_UnicodeObject(selfval, self._len())
+            return W_UnicodeObject(selfval, self._len(), self._get_flag())
         builder = StringBuilder(num_zeros + len(selfval))
         if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'):
             # copy sign to first position
@@ -694,7 +745,7 @@
             start = 0
         builder.append_multiple_char('0', num_zeros)
         builder.append_slice(selfval, start, len(selfval))
-        return W_UnicodeObject(builder.build(), width)
+        return W_UnicodeObject(builder.build(), width, self._get_flag())
 
     @unwrap_spec(maxsplit=int)
     def descr_split(self, space, w_sep=None, maxsplit=-1):
@@ -753,7 +804,7 @@
                 break
             i += 1
             byte_pos = self._index_to_byte(start + i * step)
-        return W_UnicodeObject(builder.build(), sl)
+        return W_UnicodeObject(builder.build(), sl, self._get_flag())
 
     def descr_getslice(self, space, w_start, w_stop):
         start, stop = normalize_simple_slice(
@@ -770,22 +821,30 @@
         assert stop >= 0
         byte_start = self._index_to_byte(start)
         byte_stop = self._index_to_byte(stop)
-        return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start)
+        return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start,
+                               self._get_flag())
 
     def descr_capitalize(self, space):
         value = self._utf8
         if len(value) == 0:
             return self._empty()
 
+        flag = self._get_flag()
         builder = StringBuilder(len(value))
         uchar = rutf8.codepoint_at_pos(value, 0)
         i = rutf8.next_codepoint_pos(value, 0)
-        rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar))
+        ch = unicodedb.toupper(uchar)
+        rutf8.unichr_as_utf8_append(builder, ch)
+        if ch >= 0x80:
+            flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
             i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(uchar))
-        return W_UnicodeObject(builder.build(), self._len())
+            ch = unicodedb.tolower(uchar)
+            rutf8.unichr_as_utf8_append(builder, ch)
+            if ch >= 0x80:
+                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+        return W_UnicodeObject(builder.build(), self._len(), flag)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_center(self, space, width, w_fillchar):
@@ -804,7 +863,7 @@
             centered = value
             d = 0
 
-        return W_UnicodeObject(centered, self._len() + d)
+        return W_UnicodeObject(centered, self._len() + d, self._get_flag())
 
     def descr_count(self, space, w_sub, w_start=None, w_end=None):
         value = self._utf8
@@ -830,11 +889,11 @@
         if pos < 0:
             return space.newtuple([self, self._empty(), self._empty()])
         else:
-            lgt = rutf8.check_utf8(value, True, stop=pos)
+            lgt, _ = rutf8.check_utf8(value, True, stop=pos)
             return space.newtuple(
-                [W_UnicodeObject(value[0:pos], lgt), w_sub,
+                [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub,
                  W_UnicodeObject(value[pos + len(sub._utf8):len(value)],
-                    self._len() - lgt - sublen)])
+                    self._len() - lgt - sublen, self._get_flag())])
 
     def descr_rpartition(self, space, w_sub):
         value = self._utf8
@@ -848,11 +907,11 @@
         if pos < 0:
             return space.newtuple([self._empty(), self._empty(), self])
         else:
-            lgt = rutf8.check_utf8(value, True, stop=pos)
+            lgt, _ = rutf8.check_utf8(value, True, stop=pos)
             return space.newtuple(
-                [W_UnicodeObject(value[0:pos], lgt), w_sub,
+                [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub,
                  W_UnicodeObject(value[pos + len(sub._utf8):len(value)],
-                    self._len() - lgt - sublen)])
+                    self._len() - lgt - sublen, self._get_flag())])
 
     @unwrap_spec(count=int)
     def descr_replace(self, space, w_old, w_new, count=-1):
@@ -870,8 +929,9 @@
         except OverflowError:
             raise oefmt(space.w_OverflowError, "replace string is too long")
 
+        flag = self._combine_flags(self._get_flag(), w_by._get_flag())
         newlength = self._length + replacements * (w_by._length - w_sub._length)
-        return W_UnicodeObject(res, newlength)
+        return W_UnicodeObject(res, newlength, flag)
 
     def descr_mul(self, space, w_times):
         try:
@@ -883,16 +943,29 @@
         if times <= 0:
             return self._empty()
         if len(self._utf8) == 1:
-            return W_UnicodeObject(self._utf8[0] * times, times)
-        return W_UnicodeObject(self._utf8 * times, times * self._len())
+            return W_UnicodeObject(self._utf8[0] * times, times,
+                                   self._get_flag())
+        return W_UnicodeObject(self._utf8 * times, times * self._len(),
+                               self._get_flag())
 
     descr_rmul = descr_mul
 
     def _get_index_storage(self):
-        storage = jit.conditional_call_elidable(self._index_storage,
-                    rutf8.create_utf8_index_storage, self._utf8, self._length)
+        # XXX write the correct jit.elidable
+        condition = (self._index_storage == rutf8.null_storage() or
+                     not bool(self._index_storage.contents))
+        if condition:
+            storage = rutf8.create_utf8_index_storage(self._utf8, self._length)
+        else:
+            storage = self._index_storage
         if not jit.isconstant(self):
+            prev_storage = self._index_storage
             self._index_storage = storage
+            if prev_storage == rutf8.UTF8_HAS_SURROGATES:
+                flag = rutf8.FLAG_HAS_SURROGATES
+            else:
+                flag = rutf8.FLAG_REGULAR
+            self._index_storage.flag = flag
         return storage
 
     def _getitem_result(self, space, index):
@@ -902,9 +975,19 @@
             raise oefmt(space.w_IndexError, "string index out of range")
         start = self._index_to_byte(index)
         end = rutf8.next_codepoint_pos(self._utf8, start)
-        return W_UnicodeObject(self._utf8[start:end], 1)
+        return W_UnicodeObject(self._utf8[start:end], 1, self._get_flag())
+
+    def _is_ascii(self):
+        return self._index_storage is rutf8.UTF8_IS_ASCII
+
+    def _has_surrogates(self):
+        return (self._index_storage is rutf8.UTF8_HAS_SURROGATES or
+                (bool(self._index_storage) and
+                 self._index_storage.flag == rutf8.FLAG_HAS_SURROGATES))
 
     def _index_to_byte(self, index):
+        if self._is_ascii():
+            return index
         return rutf8.codepoint_position_at_index(
             self._utf8, self._get_index_storage(), index)
 
@@ -967,6 +1050,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "rjust() argument 2 must be a single character")
+        flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - lgt
         if d > 0:
             if len(w_fillchar._utf8) == 1:
@@ -974,9 +1058,9 @@
                 value = d * w_fillchar._utf8[0] + value
             else:
                 value = d * w_fillchar._utf8 + value
-            return W_UnicodeObject(value, width)
+            return W_UnicodeObject(value, width, flag)
 
-        return W_UnicodeObject(value, lgt)
+        return W_UnicodeObject(value, lgt, flag)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_ljust(self, space, width, w_fillchar):
@@ -985,6 +1069,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "ljust() argument 2 must be a single character")
+        flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - self._len()
         if d > 0:
             if len(w_fillchar._utf8) == 1:
@@ -992,9 +1077,9 @@
                 value = value + d * w_fillchar._utf8[0]
             else:
                 value = value + d * w_fillchar._utf8
-            return W_UnicodeObject(value, width)
+            return W_UnicodeObject(value, width, flag)
 
-        return W_UnicodeObject(value, self._len())
+        return W_UnicodeObject(value, self._len(), flag)
 
     def _utf8_sliced(self, start, stop, lgt):
         assert start >= 0
@@ -1002,7 +1087,7 @@
         #if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj),
         #                                                space.w_bytes):
         #    return orig_obj
-        return W_UnicodeObject(self._utf8[start:stop], lgt)
+        return W_UnicodeObject(self._utf8[start:stop], lgt, self._get_flag())
 
     def _strip_none(self, space, left, right):
         "internal function called by str_xstrip methods"
@@ -1050,7 +1135,7 @@
         return self._utf8_sliced(lpos, rpos, lgt)
 
     def descr_getnewargs(self, space):
-        return space.newtuple([W_UnicodeObject(self._utf8, self._length)])
+        return space.newtuple([W_UnicodeObject(self._utf8, self._length, self._get_flag())])
 
 
@@ -1135,11 +1220,11 @@
         if encoding == 'ascii':
             s = space.charbuf_w(w_obj)
             unicodehelper.check_ascii_or_raise(space, s)
-            return space.newutf8(s, len(s))
+            return space.newutf8(s, len(s), rutf8.FLAG_ASCII)
         if encoding == 'utf-8':
             s = space.charbuf_w(w_obj)
-            lgt = unicodehelper.check_utf8_or_raise(space, s)
-            return space.newutf8(s, lgt)
+            lgt, flag = unicodehelper.check_utf8_or_raise(space, s)
+            return space.newutf8(s, lgt, flag)
     w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.newtext("decode"))
     if errors is None:
@@ -1194,7 +1279,7 @@
         return unicode_from_encoded_object(space, w_bytes, encoding, "strict")
     s = space.bytes_w(w_bytes)
     unicodehelper.check_ascii_or_raise(space, s)
-    return W_UnicodeObject(s, len(s))
+    return W_UnicodeObject(s, len(s), rutf8.FLAG_ASCII)
 
 
 class UnicodeDocstrings:
@@ -1741,7 +1826,7 @@
     return [s for s in value]
 
 
-W_UnicodeObject.EMPTY = W_UnicodeObject('', 0)
+W_UnicodeObject.EMPTY = W_UnicodeObject('', 0, rutf8.FLAG_ASCII)
 
 
 # Helper for converting int/long
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -305,14 +305,14 @@
 def check_utf8(s, allow_surrogates, start=0, stop=-1):
     """Check that 's' is a utf-8-encoded byte string.
 
-    Returns the length (number of chars) and flags or raise CheckError.
+    Returns the length (number of chars) and flag or raise CheckError.
     If allow_surrogates is False, then also raise if we see any.
     Note also codepoints_in_utf8(), which also computes the length
     faster by assuming that 's' is valid utf-8.
     """
-    res, flags = _check_utf8(s, allow_surrogates, start, stop)
+    res, flag = _check_utf8(s, allow_surrogates, start, stop)
     if res >= 0:
-        return res, flags
+        return res, flag
     raise CheckError(~res)
 
 @jit.elidable
@@ -416,12 +416,13 @@
     return False
 
 
-UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct(
-    'utf8_loc',
+UTF8_INDEX_STORAGE = lltype.GcStruct('utf8_loc',
+    ('flag', lltype.Signed),
+    ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct(
+    'utf8_loc_elem',
     ('baseindex', lltype.Signed),
-    ('flag', lltype.Signed),
-    ('ofs', lltype.FixedSizeArray(lltype.Char, 16))
-    ))
+    ('ofs', lltype.FixedSizeArray(lltype.Char, 16)))
+    ))))
 
 FLAG_REGULAR = 0
 FLAG_HAS_SURROGATES = 1
@@ -429,43 +430,47 @@
 # note that we never need index storage if we're pure ascii, but it's useful
 # for passing into W_UnicodeObject.__init__
 
-ASCII_INDEX_STORAGE_BLOCKS = 5
-ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE,
-                                    ASCII_INDEX_STORAGE_BLOCKS,
-                                    immortal=True)
-for _i in range(ASCII_INDEX_STORAGE_BLOCKS):
-    ASCII_INDEX_STORAGE[_i].baseindex = _i * 64
-    for _j in range(16):
-        ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1)
+#ASCII_INDEX_STORAGE_BLOCKS = 5
+#ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO,
+#                                    ASCII_INDEX_STORAGE_BLOCKS,
+#                                    immortal=True)
+#for _i in range(ASCII_INDEX_STORAGE_BLOCKS):
+#    ASCII_INDEX_STORAGE[_i].baseindex = _i * 64
+#    for _j in range(16):
+#        ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1)
 
 def null_storage():
     return lltype.nullptr(UTF8_INDEX_STORAGE)
 
-UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True)
-UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True)
+UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True)
+UTF8_IS_ASCII.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO)
+UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True)
+UTF8_HAS_SURROGATES.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO)
 
 def create_utf8_index_storage(utf8, utf8len):
     """ Create an index storage which stores index of each 4th character
     in utf8 encoded unicode string.
     """
-    if len(utf8) == utf8len < ASCII_INDEX_STORAGE_BLOCKS * 64:
-        return ASCII_INDEX_STORAGE
+#    if len(utf8) == utf8len < ASCII_INDEX_STORAGE_BLOCKS * 64:
+#        return ASCII_INDEX_STORAGE
     arraysize = utf8len // 64 + 1
-    storage = lltype.malloc(UTF8_INDEX_STORAGE, arraysize)
+    storage = lltype.malloc(UTF8_INDEX_STORAGE)
+    contents = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO, arraysize)
+    storage.contents = contents
     baseindex = 0
     current = 0
     while True:
-        storage[current].baseindex = baseindex
+        contents[current].baseindex = baseindex
         next = baseindex
         for i in range(16):
             if utf8len == 0:
                 next += 1      # assume there is an extra '\x00' character
             else:
                 next = next_codepoint_pos(utf8, next)
-            storage[current].ofs[i] = chr(next - baseindex)
+            contents[current].ofs[i] = chr(next - baseindex)
             utf8len -= 4
             if utf8len < 0:
-                assert current + 1 == len(storage)
+                assert current + 1 == len(contents)
                 break
             next = next_codepoint_pos(utf8, next)
             next = next_codepoint_pos(utf8, next)
@@ -485,8 +490,8 @@
     this function.
     """
     current = index >> 6
-    ofs = ord(storage[current].ofs[(index >> 2) & 0x0F])
-    bytepos = storage[current].baseindex + ofs
+    ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F])
+    bytepos = storage.contents[current].baseindex + ofs
     index &= 0x3
     if index == 0:
         return prev_codepoint_pos(utf8, bytepos)
@@ -504,8 +509,8 @@
     storage of type UTF8_INDEX_STORAGE
     """
     current = index >> 6
-    ofs = ord(storage[current].ofs[(index >> 2) & 0x0F])
-    bytepos = storage[current].baseindex + ofs
+    ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F])
+    bytepos = storage.contents[current].baseindex + ofs
     index &= 0x3
     if index == 0:
         return codepoint_before_pos(utf8, bytepos)

From pypy.commits at gmail.com  Sat Nov  4 18:17:03 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:03 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: whack enough to get somewhere with
 the list strategy - just for ascii-unicode so far
Message-ID: <59fe3c5f.3bb0df0a.1515b.2ae9@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92939:0aeb46cc86b0
Date: 2017-11-04 19:37 +0100
http://bitbucket.org/pypy/pypy/changeset/0aeb46cc86b0/

Log:	whack enough to get somewhere with the list strategy - just for
	ascii-unicode so far

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1054,7 +1054,7 @@
         """
         return None
 
-    def listview_unicode(self, w_list):
+    def listview_utf8(self, w_list):
         """ Return a list of unwrapped unicode out of a list of unicode. If the
         argument is not a list or does not contain only unicode, return None.
         May return None anyway.
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -10,7 +10,7 @@
 import operator
 import sys
 
-from rpython.rlib import debug, jit, rerased
+from rpython.rlib import debug, jit, rerased, rutf8
 from rpython.rlib.listsort import make_timsort_class
 from rpython.rlib.objectmodel import (
     import_from_mixin, instantiate, newlist_hint, resizelist_hint, specialize)
@@ -95,10 +95,11 @@
         else:
             return space.fromcache(BytesListStrategy)
 
-    elif False and type(w_firstobj) is W_UnicodeObject: # disable unicode list strat
+    elif type(w_firstobj) is W_UnicodeObject and w_firstobj.is_ascii():
         # check for all-unicodes
         for i in range(1, len(list_w)):
-            if type(list_w[i]) is not W_UnicodeObject:
+            item = list_w[i]
+            if type(item) is not W_UnicodeObject or not item.is_ascii():
                 break
         else:
             return space.fromcache(UnicodeListStrategy)
@@ -196,7 +197,6 @@
 
     @staticmethod
     def newlist_unicode(space, list_u):
-        xxxx
         strategy = space.fromcache(UnicodeListStrategy)
         storage = strategy.erase(list_u)
         return W_ListObject.from_storage_and_strategy(space, storage, strategy)
@@ -349,10 +349,10 @@
         not use the list strategy, return None."""
         return self.strategy.getitems_bytes(self)
 
-    def getitems_unicode(self):
+    def getitems_utf8(self):
         """Return the items in the list as unwrapped unicodes. If the list does
         not use the list strategy, return None."""
-        return self.strategy.getitems_unicode(self)
+        return self.strategy.getitems_utf8(self)
 
     def getitems_int(self):
         """Return the items in the list as unwrapped ints. If the list does not
@@ -813,7 +813,7 @@
     def getitems_bytes(self, w_list):
         return None
 
-    def getitems_unicode(self, w_list):
+    def getitems_utf8(self, w_list):
         return None
 
     def getitems_int(self, w_list):
@@ -954,8 +954,8 @@
             strategy = self.space.fromcache(IntegerListStrategy)
         elif type(w_item) is W_BytesObject:
             strategy = self.space.fromcache(BytesListStrategy)
-        #elif type(w_item) is W_UnicodeObject:
-        #    strategy = self.space.fromcache(UnicodeListStrategy)
+        elif type(w_item) is W_UnicodeObject and w_item.is_ascii():
+            strategy = self.space.fromcache(UnicodeListStrategy)
         elif type(w_item) is W_FloatObject:
             strategy = self.space.fromcache(FloatListStrategy)
         else:
@@ -1025,9 +1025,8 @@
             w_list.lstorage = strategy.erase(byteslist[:])
             return
 
-        if False:
-          unilist = space.listview_unicode(w_iterable)
-          if unilist is not None:
+        unilist = space.listview_utf8(w_iterable)
+        if unilist is not None:
             w_list.strategy = strategy = space.fromcache(UnicodeListStrategy)
             # need to copy because intlist can share with w_iterable
             w_list.lstorage = strategy.erase(unilist[:])
@@ -1995,11 +1994,11 @@
 class UnicodeListStrategy(ListStrategy):
     import_from_mixin(AbstractUnwrappedStrategy)
 
-    _none_value = u""
+    _none_value = ""
 
     def wrap(self, stringval):
         assert stringval is not None
-        return self.space.newunicode(stringval)
+        return self.space.newutf8(stringval, len(stringval), rutf8.FLAG_ASCII)
 
     def unwrap(self, w_string):
         return self.space.utf8_w(w_string)
@@ -2009,7 +2008,7 @@
     unerase = staticmethod(unerase)
 
     def is_correct_type(self, w_obj):
-        return type(w_obj) is W_UnicodeObject
+        return type(w_obj) is W_UnicodeObject and w_obj.is_ascii()
 
     def list_is_correct_type(self, w_list):
         return w_list.strategy is self.space.fromcache(UnicodeListStrategy)
@@ -2021,7 +2020,7 @@
         if reverse:
             l.reverse()
 
-    def getitems_unicode(self, w_list):
+    def getitems_utf8(self, w_list):
         return self.unerase(w_list.lstorage)
 
 # _______________________________________________________
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -164,7 +164,9 @@
         if isinstance(x, str):
             return self.newtext(x)
         if isinstance(x, unicode):
-            return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR)
+            from pypy.interpreter import unicodehelper
+            return self.newutf8(x.encode('utf8'), len(x),
+                                unicodehelper._get_flag(x))
         if isinstance(x, float):
             return W_FloatObject(x)
         if isinstance(x, W_Root):
@@ -507,20 +509,20 @@
             return w_obj.getitems_bytes()
         return None
 
-    def listview_unicode(self, w_obj):
+    def listview_utf8(self, w_obj):
         # note: uses exact type checking for objects with strategies,
         # and isinstance() for others.  See test_listobject.test_uses_custom...
         if type(w_obj) is W_ListObject:
-            return w_obj.getitems_unicode()
+            return w_obj.getitems_utf8()
         if type(w_obj) is W_DictObject:
             return w_obj.listview_unicode()
         if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject:
             return w_obj.listview_unicode()
         if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj)
             and w_obj.is_ascii()):
-            return w_obj.listview_unicode()
+            return w_obj.listview_utf8()
         if isinstance(w_obj, W_ListObject) and self._uses_list_iter(w_obj):
-            return w_obj.getitems_unicode()
+            return w_obj.getitems_utf8()
         return None
 
     def listview_int(self, w_obj):
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -1591,7 +1591,7 @@
         w_set.sstorage = strategy.get_storage_from_unwrapped_list(byteslist)
         return
 
-    unicodelist = space.listview_unicode(w_iterable)
+    unicodelist = space.listview_utf8(w_iterable)
     if unicodelist is not None:
         strategy = space.fromcache(UnicodeSetStrategy)
         w_set.strategy = strategy
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -20,9 +20,9 @@
                           IntegerListStrategy)
         assert isinstance(W_ListObject(space, [wb('a'), wb('b')]).strategy,
                           BytesListStrategy)
-        #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy,
-        #                  UnicodeListStrategy)
-        assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy,
+        assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy,
+                          UnicodeListStrategy)
+        assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy,
                           ObjectListStrategy) # mixed unicode and bytes
 
     def test_empty_to_any(self):
@@ -47,7 +47,7 @@
         l = W_ListObject(space, [])
         assert isinstance(l.strategy, EmptyListStrategy)
         l.append(w(u'a'))
-        #assert isinstance(l.strategy, UnicodeListStrategy)
+        assert isinstance(l.strategy, UnicodeListStrategy)
 
         l = W_ListObject(space, [])
         assert isinstance(l.strategy, EmptyListStrategy)
@@ -74,7 +74,6 @@
         assert isinstance(l.strategy, ObjectListStrategy)
 
     def test_unicode_to_any(self):
-        py.test.skip("disabled")
         space = self.space
         l = W_ListObject(space, [space.wrap(u'a'), space.wrap(u'b'), space.wrap(u'c')])
         assert isinstance(l.strategy, UnicodeListStrategy)
@@ -118,7 +117,7 @@
 
         # UnicodeStrategy to ObjectStrategy
         l = W_ListObject(space, [w(u'a'),w(u'b'),w(u'c')])
-        #assert isinstance(l.strategy, UnicodeListStrategy)
+        assert isinstance(l.strategy, UnicodeListStrategy)
         l.setitem(0, w(2))
         assert isinstance(l.strategy, ObjectListStrategy)
 
@@ -146,7 +145,7 @@
 
         # UnicodeStrategy
         l = W_ListObject(space, [w(u'a'),w(u'b'),w(u'c')])
-        #assert isinstance(l.strategy, UnicodeListStrategy)
+        assert isinstance(l.strategy, UnicodeListStrategy)
         l.insert(3, w(2))
         assert isinstance(l.strategy, ObjectListStrategy)
 
@@ -226,7 +225,7 @@
 
         # UnicodeStrategy to ObjectStrategy
         l = W_ListObject(space, [w(u'a'), w(u'b'), w(u'c')])
-        #assert isinstance(l.strategy, UnicodeListStrategy)
+        assert isinstance(l.strategy, UnicodeListStrategy)
         l.setslice(0, 1, 2, W_ListObject(space, [w(1), w(2), w(3)]))
         assert isinstance(l.strategy, ObjectListStrategy)
 
@@ -276,7 +275,7 @@
         l = W_ListObject(space, wrapitems([u"a",u"b",u"c",u"d",u"e"]))
         other = W_ListObject(space, wrapitems([u"a", u"b", u"c"]))
         keep_other_strategy(l, 0, 2, other.length(), other)
-        #assert l.strategy is space.fromcache(UnicodeListStrategy)
+        assert l.strategy is space.fromcache(UnicodeListStrategy)
 
         l = W_ListObject(space, wrapitems([1.1, 2.2, 3.3, 4.4, 5.5]))
         other = W_ListObject(space, [])
@@ -346,7 +345,7 @@
         empty = W_ListObject(space, [])
         assert isinstance(empty.strategy, EmptyListStrategy)
         empty.extend(W_ListObject(space, [w(u"a"), w(u"b"), w(u"c")]))
-        #assert isinstance(empty.strategy, UnicodeListStrategy)
+        assert isinstance(empty.strategy, UnicodeListStrategy)
 
         empty = W_ListObject(space, [])
         assert isinstance(empty.strategy, EmptyListStrategy)
@@ -602,7 +601,7 @@
         l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")])
         assert isinstance(l1.strategy, BytesListStrategy)
         l2 = W_ListObject(self.space, [self.space.newunicode(u"eins"), self.space.newunicode(u"zwei")])
-        #assert isinstance(l2.strategy, UnicodeListStrategy)
+        assert isinstance(l2.strategy, UnicodeListStrategy)
         l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newunicode(u"zwei")])
         assert isinstance(l3.strategy, ObjectListStrategy)
 
@@ -613,11 +612,10 @@
         assert space.listview_bytes(w_l) == ["a", "b"]
 
     def test_listview_unicode(self):
-        py.test.skip("disabled")
         space = self.space
-        assert space.listview_unicode(space.wrap(1)) == None
+        assert space.listview_utf8(space.wrap(1)) == None
         w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')])
-        assert space.listview_unicode(w_l) == [u"a", u"b"]
+        assert space.listview_utf8(w_l) == ["a", "b"]
 
     def test_string_join_uses_listview_bytes(self):
         space = self.space
@@ -626,7 +624,6 @@
         assert space.str_w(space.call_method(space.wrap("c"), "join", w_l)) == "acb"
         #
         # the same for unicode
-        py.test.skip("disabled")
         w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')])
         w_l.getitems = None
         assert space.unicode_w(space.call_method(space.wrap(u"c"), "join", w_l)) == u"acb"
@@ -639,7 +636,6 @@
         assert space.is_w(space.call_method(space.wrap(" -- "), "join", w_l), w_text)
         #
         # the same for unicode
-        py.test.skip("disabled")
         w_text = space.wrap(u"text")
         w_l = self.space.newlist([w_text])
         w_l.getitems = None
@@ -669,7 +665,6 @@
         assert space.listview_bytes(w_l4) == ["a", "b", "c"]
 
     def test_unicode_uses_newlist_unicode(self):
-        py.test.skip("disabled")
         space = self.space
         w_u = space.wrap(u"a b c")
         space.newlist = None
@@ -725,7 +720,6 @@
         assert self.space.listview_bytes(w_l) == ["a", "b"]
 
     def test_listview_unicode_list(self):
-        py.test.skip("disabled")
         space = self.space
         w_l = W_ListObject(space, [space.wrap(u"a"), space.wrap(u"b")])
         assert self.space.listview_unicode(w_l) == [u"a", u"b"]
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -116,7 +116,7 @@
 
     charbuf_w = str_w
 
-    def listview_unicode(self):
+    def listview_utf8(self):
         assert self.is_ascii()
         return _create_list_from_unicode(self._utf8)
 
@@ -502,9 +502,9 @@
 
     _StringMethods_descr_join = descr_join
     def descr_join(self, space, w_list):
-        l = space.listview_unicode(w_list)
+        l = space.listview_utf8(w_list)
         if l is not None:
-            assert False, "unreachable"
+            xxxx
             if len(l) == 1:
                 return space.newunicode(l[0])
             return space.newunicode(self._utf8).join(l)

From pypy.commits at gmail.com  Sat Nov  4 18:17:01 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:01 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: fix enough to pass all the tests
 in test_unicodeobject
Message-ID: <59fe3c5d.530a1c0a.f6334.38f2@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92938:94c9ccfbd63c
Date: 2017-11-04 19:17 +0100
http://bitbucket.org/pypy/pypy/changeset/94c9ccfbd63c/

Log:	fix enough to pass all the tests in test_unicodeobject

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -69,7 +69,7 @@
         final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
         unicodedata_handler=unicodedata_handler)
     # XXX argh.  we want each surrogate to be encoded separately
-    utf8 = ''.join([u.encode('utf8') for u in result_u])
+    utf8 = result_u.encode('utf8')
     if rutf8.first_non_ascii_char(utf8) == -1:
         flag = rutf8.FLAG_ASCII
     elif _has_surrogate(result_u):
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -475,12 +475,11 @@
     # call the fast version for checking
     try:
         lgt, flag = rutf8.check_utf8(string, allow_surrogates=True)
-    except rutf8.CheckError as e:
+    except rutf8.CheckError:
         # XXX do the way around runicode - we can optimize it later if we
         # decide we care about obscure cases
-        res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string),
-            errors, final, state.decode_error_handler)
-        flag = unicodehelper._get_flag(res.decode("utf8"))
+        res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string,
+            len(string), errors, final, state.decode_error_handler)
         return space.newtuple([space.newutf8(res, lgt, flag),
                                space.newint(consumed)])
     else:
@@ -695,12 +694,11 @@
 
     unicode_name_handler = state.get_unicodedata_handler(space)
 
-    result, consumed, lgt = unicodehelper.str_decode_unicode_escape(
+    result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape(
         string, len(string), errors,
         final, state.decode_error_handler,
         unicode_name_handler)
 
-    flag = unicodehelper._get_flag(result.decode('utf8'))
     return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)])
 
 # ____________________________________________________________

From pypy.commits at gmail.com  Sat Nov  4 18:17:08 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:08 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: add assertions for now
Message-ID: <59fe3c64.87c7df0a.df252.a6c0@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92941:4bd78617a41a
Date: 2017-11-04 20:37 +0100
http://bitbucket.org/pypy/pypy/changeset/4bd78617a41a/

Log:	add assertions for now

diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -48,6 +48,9 @@
         else:
             assert flag == rutf8.FLAG_REGULAR
             self._index_storage = rutf8.null_storage()
+        lgt, flag_check = rutf8.check_utf8(utf8str, True)
+        assert lgt == length
+        assert flag == flag_check
         # the storage can be one of:
         # - null, unicode with no surrogates
         # - rutf8.UTF8_HAS_SURROGATES

From pypy.commits at gmail.com  Sat Nov  4 18:17:09 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:09 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: update TODO
Message-ID: <59fe3c65.8faedf0a.fc527.38cf@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92942:ec7d2032eb70
Date: 2017-11-04 20:38 +0100
http://bitbucket.org/pypy/pypy/changeset/ec7d2032eb70/

Log:	update TODO

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -5,7 +5,5 @@
 * fix cpyext
 * write the correct jit_elidable in _get_index_storage
 * better flag handling in split/splitlines maybe?
-* find all the fast-paths that we want to do with utf8 (we only do
-  utf-8 now, not UTF8 or utf8) for decode/encode
 * encode_error_handler has XXX
-* reenable list strategies for ascii-only unicode
+* remove assertions from W_UnicodeObject.__init__ if all the builders pass

From pypy.commits at gmail.com  Sat Nov  4 18:17:05 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:05 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: fight until the strategies seem to
 work again for ascii unicode strings at least
Message-ID: <59fe3c61.28361c0a.a0b50.ac26@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92940:1645f5285398
Date: 2017-11-04 20:32 +0100
http://bitbucket.org/pypy/pypy/changeset/1645f5285398/

Log:	fight until the strategies seem to work again for ascii unicode
	strings at least

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -3,7 +3,7 @@
 
 from rpython.rlib.cache import Cache
 from rpython.tool.uid import HUGEVAL_BYTES
-from rpython.rlib import jit, types
+from rpython.rlib import jit, types, rutf8
 from rpython.rlib.debug import make_sure_not_resized
 from rpython.rlib.objectmodel import (we_are_translated, newlist_hint,
      compute_unique_id, specialize, not_rpython)
@@ -1084,8 +1084,12 @@
     def newlist_bytes(self, list_s):
         return self.newlist([self.newbytes(s) for s in list_s])
 
-    def newlist_unicode(self, list_u):
-        return self.newlist([self.newunicode(u) for u in list_u])
+    def newlist_utf8(self, list_u, is_ascii):
+        l_w = [None] * len(list_u)
+        for i, item in enumerate(list_u):
+            length, flag = rutf8.check_utf8(item, True)
+            l_w[i] = self.newutf8(item, length, flag)
+        return self.newlist(l_w)
 
     def newlist_int(self, list_i):
         return self.newlist([self.newint(i) for i in list_i])
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1,6 +1,6 @@
 """The builtin dict implementation"""
 
-from rpython.rlib import jit, rerased, objectmodel
+from rpython.rlib import jit, rerased, objectmodel, rutf8
 from rpython.rlib.debug import mark_dict_non_null
 from rpython.rlib.objectmodel import newlist_hint, r_dict, specialize
 from rpython.tool.sourcetools import func_renamer, func_with_new_name
@@ -441,7 +441,7 @@
                     popitem delitem clear \
                     length w_keys values items \
                     iterkeys itervalues iteritems \
-                    listview_bytes listview_unicode listview_int \
+                    listview_bytes listview_utf8 listview_int \
                     view_as_kwargs".split()
 
     def make_method(method):
@@ -593,7 +593,7 @@
     def listview_bytes(self, w_dict):
         return None
 
-    def listview_unicode(self, w_dict):
+    def listview_utf8(self, w_dict):
         return None
 
     def listview_int(self, w_dict):
@@ -640,7 +640,7 @@
         if type(w_key) is self.space.StringObjectCls:
             self.switch_to_bytes_strategy(w_dict)
             return
-        elif type(w_key) is self.space.UnicodeObjectCls:
+        elif type(w_key) is self.space.UnicodeObjectCls and w_key.is_ascii():
             self.switch_to_unicode_strategy(w_dict)
             return
         w_type = self.space.type(w_key)
@@ -1197,14 +1197,14 @@
     unerase = staticmethod(unerase)
 
     def wrap(self, unwrapped):
-        return self.space.newunicode(unwrapped)
+        return self.space.newutf8(unwrapped, len(unwrapped), rutf8.FLAG_ASCII)
 
     def unwrap(self, wrapped):
-        return self.space.unicode_w(wrapped)
+        return self.space.utf8_w(wrapped)
 
     def is_correct_type(self, w_obj):
         space = self.space
-        return space.is_w(space.type(w_obj), space.w_unicode)
+        return type(w_obj) is space.UnicodeObjectCls and w_obj.is_ascii()
 
     def get_empty_storage(self):
         res = {}
@@ -1232,14 +1232,14 @@
     ##     assert key is not None
     ##     return self.unerase(w_dict.dstorage).get(key, None)
 
-    def listview_unicode(self, w_dict):
+    def listview_utf8(self, w_dict):
         return self.unerase(w_dict.dstorage).keys()
 
     ## def w_keys(self, w_dict):
     ##     return self.space.newlist_bytes(self.listview_bytes(w_dict))
 
     def wrapkey(space, key):
-        return space.newunicode(key)
+        return space.newutf8(key, len(key), rutf8.FLAG_ASCII)
 
     ## @jit.look_inside_iff(lambda self, w_dict:
     ##                      w_dict_unrolling_heuristic(w_dict))
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -196,7 +196,7 @@
         return W_ListObject.from_storage_and_strategy(space, storage, strategy)
 
     @staticmethod
-    def newlist_unicode(space, list_u):
+    def newlist_utf8(space, list_u):
         strategy = space.fromcache(UnicodeListStrategy)
         storage = strategy.erase(list_u)
         return W_ListObject.from_storage_and_strategy(space, storage, strategy)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -309,19 +309,10 @@
 
     newlist_text = newlist_bytes
 
-    def newlist_unicode(self, list_u):
-        xxx
-        return self.newlist(list_u)
-        return W_ListObject.newlist_unicode(self, list_u)
-
-    def newlist_utf8(self, lst):
-        res_w = []
-        for utf in lst:
-            assert utf is not None
-            assert isinstance(utf, str)
-            length, flag = rutf8.check_utf8(utf, allow_surrogates=True)
-            res_w.append(self.newutf8(utf, length, flag))
-        return self.newlist(res_w)
+    def newlist_utf8(self, list_u, is_ascii):
+        if is_ascii:
+            return W_ListObject.newlist_utf8(self, list_u)
+        return ObjSpace.newlist_utf8(self, list_u, False)
 
     def newlist_int(self, list_i):
         return W_ListObject.newlist_int(self, list_i)
@@ -515,9 +506,9 @@
         if type(w_obj) is W_ListObject:
             return w_obj.getitems_utf8()
         if type(w_obj) is W_DictObject:
-            return w_obj.listview_unicode()
+            return w_obj.listview_utf8()
         if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject:
-            return w_obj.listview_unicode()
+            return w_obj.listview_utf8()
         if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj)
             and w_obj.is_ascii()):
             return w_obj.listview_utf8()
diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -12,7 +12,7 @@
 from rpython.rlib.objectmodel import iterkeys_with_hash, contains_with_hash
 from rpython.rlib.objectmodel import setitem_with_hash, delitem_with_hash
 from rpython.rlib.rarithmetic import intmask, r_uint
-from rpython.rlib import rerased, jit
+from rpython.rlib import rerased, jit, rutf8
 
 
 UNROLL_CUTOFF = 5
@@ -86,9 +86,9 @@
         """ If this is a string set return its contents as a list of uwnrapped strings. Otherwise return None. """
         return self.strategy.listview_bytes(self)
 
-    def listview_unicode(self):
+    def listview_utf8(self):
         """ If this is a unicode set return its contents as a list of uwnrapped unicodes. Otherwise return None. """
-        return self.strategy.listview_unicode(self)
+        return self.strategy.listview_utf8(self)
 
     def listview_int(self):
         """ If this is an int set return its contents as a list of uwnrapped ints. Otherwise return None. """
@@ -690,7 +690,7 @@
     def listview_bytes(self, w_set):
         return None
 
-    def listview_unicode(self, w_set):
+    def listview_utf8(self, w_set):
         return None
 
     def listview_int(self, w_set):
@@ -795,8 +795,8 @@
             strategy = self.space.fromcache(IntegerSetStrategy)
         elif type(w_key) is W_BytesObject:
             strategy = self.space.fromcache(BytesSetStrategy)
-        #elif type(w_key) is W_UnicodeObject:
-        #    strategy = self.space.fromcache(UnicodeSetStrategy)
+        elif type(w_key) is W_UnicodeObject and w_key.is_ascii():
+            strategy = self.space.fromcache(UnicodeSetStrategy)
         elif self.space.type(w_key).compares_by_identity():
             strategy = self.space.fromcache(IdentitySetStrategy)
         else:
@@ -1272,11 +1272,11 @@
     def get_empty_dict(self):
         return {}
 
-    def listview_unicode(self, w_set):
+    def listview_utf8(self, w_set):
         return self.unerase(w_set.sstorage).keys()
 
     def is_correct_type(self, w_key):
-        return type(w_key) is W_UnicodeObject
+        return type(w_key) is W_UnicodeObject and w_key.is_ascii()
 
     def may_contain_equal_elements(self, strategy):
         if strategy is self.space.fromcache(IntegerSetStrategy):
@@ -1495,7 +1495,7 @@
 
     def next_entry(self):
         for key in self.iterator:
-            return self.space.newunicode(key)
+            return self.space.newutf8(key, len(key), rutf8.FLAG_ASCII)
         else:
             return None
 
@@ -1636,13 +1636,13 @@
         return
 
     # check for unicode
-    #for w_item in iterable_w:
-    #    if type(w_item) is not W_UnicodeObject:
-    #        break
-    #else:
-    #    w_set.strategy = space.fromcache(UnicodeSetStrategy)
-    #    w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
-    #    return
+    for w_item in iterable_w:
+        if type(w_item) is not W_UnicodeObject or not w_item.is_ascii():
+            break
+    else:
+        w_set.strategy = space.fromcache(UnicodeSetStrategy)
+        w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w)
+        return
 
     # check for compares by identity
     for w_item in iterable_w:
diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py
--- a/pypy/objspace/std/test/test_dictmultiobject.py
+++ b/pypy/objspace/std/test/test_dictmultiobject.py
@@ -142,11 +142,10 @@
         assert self.space.listview_bytes(w_d) == ["a", "b"]
 
     def test_listview_unicode_dict(self):
-        py.test.skip("listview_unicode disabled")
         w = self.space.wrap
         w_d = self.space.newdict()
         w_d.initialize_content([(w(u"a"), w(1)), (w(u"b"), w(2))])
-        assert self.space.listview_unicode(w_d) == [u"a", u"b"]
+        assert self.space.listview_utf8(w_d) == ["a", "b"]
 
     def test_listview_int_dict(self):
         w = self.space.wrap
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -675,10 +675,10 @@
             w_l4 = space.call_method(w_u, "rsplit", space.wrap(" "))
         finally:
             del space.newlist
-        assert space.listview_unicode(w_l) == [u"a", u"b", u"c"]
-        assert space.listview_unicode(w_l2) == [u"a", u"b", u"c"]
-        assert space.listview_unicode(w_l3) == [u"a", u"b", u"c"]
-        assert space.listview_unicode(w_l4) == [u"a", u"b", u"c"]
+        assert space.listview_utf8(w_l) == [u"a", u"b", u"c"]
+        assert space.listview_utf8(w_l2) == [u"a", u"b", u"c"]
+        assert space.listview_utf8(w_l3) == [u"a", u"b", u"c"]
+        assert space.listview_utf8(w_l4) == [u"a", u"b", u"c"]
 
     def test_pop_without_argument_is_fast(self):
         space = self.space
@@ -722,7 +722,7 @@
     def test_listview_unicode_list(self):
         space = self.space
         w_l = W_ListObject(space, [space.wrap(u"a"), space.wrap(u"b")])
-        assert self.space.listview_unicode(w_l) == [u"a", u"b"]
+        assert self.space.listview_utf8(w_l) == [u"a", u"b"]
 
     def test_listview_int_list(self):
         space = self.space
diff --git a/pypy/objspace/std/test/test_setstrategies.py b/pypy/objspace/std/test/test_setstrategies.py
--- a/pypy/objspace/std/test/test_setstrategies.py
+++ b/pypy/objspace/std/test/test_setstrategies.py
@@ -42,7 +42,6 @@
         assert s1.strategy is self.space.fromcache(ObjectSetStrategy)
 
     def test_switch_to_unicode(self):
-        py.test.skip("disabled")
         s = W_SetObject(self.space, self.wrapped([]))
         s.add(self.space.wrap(u"six"))
         assert s.strategy is self.space.fromcache(UnicodeSetStrategy)
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -28,7 +28,7 @@
 
     def test_listview_unicode(self):
         w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII)
-        assert self.space.listview_unicode(w_str) == list(u"abcd")
+        assert self.space.listview_utf8(w_str) == list("abcd")
 
     def test_new_shortcut(self):
         space = self.space
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -196,10 +196,6 @@
     def _islinebreak(self, s, pos):
         return rutf8.islinebreak(s, pos)
 
-    def _newlist_unwrapped(self, space, lst):
-        assert False, "should not be called"
-        return space.newlist_unicode(lst)
-
     @staticmethod
     @unwrap_spec(w_string=WrappedDefault(""))
     def descr_new(space, w_unicodetype, w_string, w_encoding=None,
@@ -503,11 +499,11 @@
     _StringMethods_descr_join = descr_join
     def descr_join(self, space, w_list):
         l = space.listview_utf8(w_list)
-        if l is not None:
-            xxxx
+        if l is not None and self.is_ascii():
             if len(l) == 1:
-                return space.newunicode(l[0])
-            return space.newunicode(self._utf8).join(l)
+                return space.newutf8(l[0], len(l[0]), rutf8.FLAG_ASCII)
+            s = self._utf8.join(l)
+            return space.newutf8(s, len(s), rutf8.FLAG_ASCII)
         return self._StringMethods_descr_join(space, w_list)
 
     def _join_return_one(self, space, w_obj):
@@ -755,14 +751,14 @@
         value = self._utf8
         if space.is_none(w_sep):
             res = split(value, maxsplit=maxsplit, isutf8=True)
-            return space.newlist_utf8(res)
+            return space.newlist_utf8(res, self.is_ascii())
 
         by = self.convert_arg_to_w_unicode(space, w_sep)._utf8
         if len(by) == 0:
             raise oefmt(space.w_ValueError, "empty separator")
         res = split(value, by, maxsplit, isutf8=True)
 
-        return space.newlist_utf8(res)
+        return space.newlist_utf8(res, self.is_ascii())
 
     @unwrap_spec(maxsplit=int)
     def descr_rsplit(self, space, w_sep=None, maxsplit=-1):
@@ -770,14 +766,14 @@
         value = self._utf8
         if space.is_none(w_sep):
             res = rsplit(value, maxsplit=maxsplit, isutf8=True)
-            return space.newlist_utf8(res)
+            return space.newlist_utf8(res, self.is_ascii())
 
         by = self.convert_arg_to_w_unicode(space, w_sep)._utf8
         if len(by) == 0:
             raise oefmt(space.w_ValueError, "empty separator")
         res = rsplit(value, by, maxsplit, isutf8=True)
 
-        return space.newlist_utf8(res)
+        return space.newlist_utf8(res, self.is_ascii())
 
     def descr_getitem(self, space, w_index):
         if isinstance(w_index, W_SliceObject):

From pypy.commits at gmail.com  Sat Nov  4 18:17:11 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 04 Nov 2017 15:17:11 -0700 (PDT)
Subject: [pypy-commit] pypy unicode-utf8: ups, fix
Message-ID: <59fe3c67.0e781c0a.320bc.b089@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r92943:10e8aaa42286
Date: 2017-11-04 20:46 +0100
http://bitbucket.org/pypy/pypy/changeset/10e8aaa42286/

Log:	ups, fix

diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py
--- a/pypy/objspace/std/setobject.py
+++ b/pypy/objspace/std/setobject.py
@@ -1288,10 +1288,10 @@
         return True
 
     def unwrap(self, w_item):
-        return self.space.unicode_w(w_item)
+        return self.space.utf8_w(w_item)
 
     def wrap(self, item):
-        return self.space.newunicode(item)
+        return self.space.newutf8(item, len(item), rutf8.FLAG_ASCII)
 
     def iter(self, w_set):
         return UnicodeIteratorImplementation(self.space, self, w_set)

From pypy.commits at gmail.com  Sun Nov  5 05:34:05 2017
From: pypy.commits at gmail.com (antocuni)
Date: Sun, 05 Nov 2017 02:34:05 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: try to install vmprof and see
 what happens to test_enable and test_native
Message-ID: <59fee91d.759adf0a.8ffa5.193e@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92944:4134cbd25c42
Date: 2017-11-05 11:33 +0100
http://bitbucket.org/pypy/pypy/changeset/4134cbd25c42/

Log:	try to install vmprof and see what happens to test_enable and
	test_native

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 cffi>=1.4.0
+vmprof>=0.4.10  # required to parse log files in rvmprof tests
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis

From pypy.commits at gmail.com  Sun Nov  5 07:33:17 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 05 Nov 2017 04:33:17 -0800 (PST)
Subject: [pypy-commit] pypy assert-rewrite: Fix rpython/memory/ tests
Message-ID: <59ff050d.1bb3df0a.b3075.10c0@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: assert-rewrite
Changeset: r92945:b7b55dee74c6
Date: 2016-11-29 21:41 +0000
http://bitbucket.org/pypy/pypy/changeset/b7b55dee74c6/

Log:	Fix rpython/memory/ tests

diff --git a/rpython/memory/test/test_hybrid_gc.py b/rpython/memory/test/test_hybrid_gc.py
--- a/rpython/memory/test/test_hybrid_gc.py
+++ b/rpython/memory/test/test_hybrid_gc.py
@@ -2,6 +2,7 @@
 
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem.lloperation import llop
+from rpython.rlib.objectmodel import assert_
 
 from rpython.memory.test import test_generational_gc
 
@@ -35,12 +36,12 @@
             while i < x:
                 gc.collect()
                 i += 1
-            assert ref() is a
-            assert ref().x == 42
+            assert_(ref() is a)
+            assert_(ref().x == 42)
             return ref
         def step2(ref):
             gc.collect()       # 'a' is freed here
-            assert ref() is None
+            assert_(ref() is None)
         def f(x):
             ref = step1(x)
             step2(ref)
diff --git a/rpython/memory/test/test_incminimark_gc.py b/rpython/memory/test/test_incminimark_gc.py
--- a/rpython/memory/test/test_incminimark_gc.py
+++ b/rpython/memory/test/test_incminimark_gc.py
@@ -2,6 +2,7 @@
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rlib import rgc
+from rpython.rlib.objectmodel import assert_
 
 from rpython.memory.test import test_minimark_gc
 
@@ -21,8 +22,8 @@
             a.x = 5
             wr = weakref.ref(a)
             llop.gc__collect(lltype.Void)   # make everything old
-            assert wr() is not None
-            assert a.x == 5
+            assert_(wr() is not None)
+            assert_(a.x == 5)
             return wr
         def f():
             ref = g()
@@ -31,7 +32,7 @@
             # to an object not found, but still reachable:
             b = ref()
             llop.debug_print(lltype.Void, b)
-            assert b is not None
+            assert_(b is not None)
             llop.gc__collect(lltype.Void)   # finish the major cycle
             # assert does not crash, because 'b' is still kept alive
             b.x = 42
@@ -46,7 +47,7 @@
         def f():
             a = A()
             ref = weakref.ref(a)
-            assert not rgc.pin(ref)
+            assert_(not rgc.pin(ref))
         self.interpret(f, [])
 
     def test_pin_finalizer_not_implemented(self):
@@ -63,8 +64,8 @@
         def f():
             a = A()
             b = B()
-            assert not rgc.pin(a)
-            assert not rgc.pin(b)
+            assert_(not rgc.pin(a))
+            assert_(not rgc.pin(b))
         self.interpret(f, [])
 
     def test_weakref_to_pinned(self):
@@ -75,18 +76,18 @@
             pass
         def g():
             a = A()
-            assert rgc.pin(a)
+            assert_(rgc.pin(a))
             a.x = 100
             wr = weakref.ref(a)
             llop.gc__collect(lltype.Void)
-            assert wr() is not None
-            assert a.x == 100
+            assert_(wr() is not None)
+            assert_(a.x == 100)
             return wr
         def f():
             ref = g()
             llop.gc__collect(lltype.Void, 1)
             b = ref()
-            assert b is not None
+            assert_(b is not None)
             b.x = 101
             return ref() is b
         res = self.interpret(f, [])
diff --git a/rpython/memory/test/test_transformed_gc.py b/rpython/memory/test/test_transformed_gc.py
--- a/rpython/memory/test/test_transformed_gc.py
+++ b/rpython/memory/test/test_transformed_gc.py
@@ -15,6 +15,7 @@
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.rarithmetic import LONG_BIT
 from rpython.rtyper.rtyper import llinterp_backend
+from rpython.rlib.objectmodel import assert_
 
 
 WORD = LONG_BIT // 8
@@ -804,7 +805,7 @@
                     [A() for i in range(20)]
                 i = 0
                 while i < len(alist):
-                    assert idarray[i] == compute_unique_id(alist[i])
+                    assert_(idarray[i] == compute_unique_id(alist[i]))
                     i += 1
                 j += 1
             lltype.free(idarray, flavor='raw')
@@ -855,7 +856,7 @@
         if cls.gcname == 'incminimark':
             marker = cls.marker
             def cleanup():
-                assert marker[0] > 0
+                assert_(marker[0] > 0)
                 marker[0] = 0
         else:
             cleanup = None
@@ -987,7 +988,7 @@
             for i in range(20):
                 x.append((1, lltype.malloc(S)))
             for i in range(50):
-                assert l2[i] == l[50 + i]
+                assert_(l2[i] == l[50 + i])
             return 0
 
         return fn
@@ -1036,9 +1037,9 @@
             while i < x:
                 all[i] = [i] * i
                 i += 1
-            assert ref() is a
+            assert_(ref() is a)
             llop.gc__collect(lltype.Void)
-            assert ref() is a
+            assert_(ref() is a)
             return a.foo + len(all)
         return f
 
@@ -1115,7 +1116,7 @@
             i = 0
             while i < 17:
                 ref = weakref.ref(a)
-                assert ref() is a
+                assert_(ref() is a)
                 i += 1
             return 0
 
@@ -1182,9 +1183,9 @@
             a1 = A()
             nf1 = nf_a.address[0]
             nt1 = nt_a.address[0]
-            assert nf1 > nf0
-            assert nt1 > nf1
-            assert nt1 == nt0
+            assert_(nf1 > nf0)
+            assert_(nt1 > nf1)
+            assert_(nt1 == nt0)
             return 0
 
         return f
@@ -1359,7 +1360,7 @@
                 hashes.append(compute_identity_hash(obj))
             unique = {}
             for i in range(len(objects)):
-                assert compute_identity_hash(objects[i]) == hashes[i]
+                assert_(compute_identity_hash(objects[i]) == hashes[i])
                 unique[hashes[i]] = None
             return len(unique)
         return fn
diff --git a/rpython/rtyper/test/test_exception.py b/rpython/rtyper/test/test_exception.py
--- a/rpython/rtyper/test/test_exception.py
+++ b/rpython/rtyper/test/test_exception.py
@@ -49,39 +49,39 @@
             try:
                 g(n)
             except IOError as e:
-                assert e.errno == 0
-                assert e.strerror == "test"
-                assert e.filename is None
+                assert_(e.errno == 0)
+                assert_(e.strerror == "test")
+                assert_(e.filename is None)
             else:
-                assert False
+                assert_(False)
             try:
                 h(n)
             except OSError as e:
-                assert e.errno == 42
-                assert e.strerror == "?"
-                assert e.filename is None
+                assert_(e.errno == 42)
+                assert_(e.strerror == "?")
+                assert_(e.filename is None)
             else:
-                assert False
+                assert_(False)
             try:
                 i(n)
             except EnvironmentError as e:
-                assert e.errno == 42
-                assert e.strerror == "?"
-                assert e.filename == "test"
+                assert_(e.errno == 42)
+                assert_(e.strerror == "?")
+                assert_(e.filename == "test")
             else:
-                assert False
+                assert_(False)
             try:
                 j(n)
             except (IOError, OSError) as e:
-                assert e.errno == 0
-                assert e.strerror == "test"
-                assert e.filename is None
+                assert_(e.errno == 0)
+                assert_(e.strerror == "test")
+                assert_(e.filename is None)
             try:
                 k(n)
             except EnvironmentError as e:
-                assert e.errno == 0
-                assert e.strerror is None
-                assert e.filename is None
+                assert_(e.errno == 0)
+                assert_(e.strerror is None)
+                assert_(e.filename is None)
         self.interpret(f, [42])
 
     def test_catch_incompatible_class(self):
@@ -91,7 +91,7 @@
             pass
         def f(n):
             try:
-                assert n < 10
+                assert_(n < 10)
             except MyError as operr:
                 h(operr)
         res = self.interpret(f, [7])

From pypy.commits at gmail.com  Sun Nov  5 09:48:01 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 05 Nov 2017 06:48:01 -0800 (PST)
Subject: [pypy-commit] pypy assert-rewrite: Fix asserts in rpython/rtyper/
Message-ID: <59ff24a1.3bb0df0a.1515b.83b2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: assert-rewrite
Changeset: r92947:b994fd06043b
Date: 2017-11-05 14:43 +0000
http://bitbucket.org/pypy/pypy/changeset/b994fd06043b/

Log:	Fix asserts in rpython/rtyper/

diff --git a/rpython/rtyper/lltypesystem/test/test_llarena.py b/rpython/rtyper/lltypesystem/test/test_llarena.py
--- a/rpython/rtyper/lltypesystem/test/test_llarena.py
+++ b/rpython/rtyper/lltypesystem/test/test_llarena.py
@@ -1,5 +1,6 @@
 import py, os
 
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, llarena
 from rpython.rtyper.lltypesystem.llarena import (arena_malloc, arena_reset,
     arena_reserve, arena_free, round_up_for_allocation, ArenaError,
@@ -143,12 +144,12 @@
     b = a + round_up_for_allocation(llmemory.sizeof(lltype.Char))
     arena_reserve(b, precomputed_size)
     (b + llmemory.offsetof(SX, 'x')).signed[0] = 123
-    assert llmemory.cast_adr_to_ptr(b, SPTR).x == 123
+    assert_(llmemory.cast_adr_to_ptr(b, SPTR).x == 123)
     llmemory.cast_adr_to_ptr(b, SPTR).x += 1
-    assert (b + llmemory.offsetof(SX, 'x')).signed[0] == 124
+    assert_((b + llmemory.offsetof(SX, 'x')).signed[0] == 124)
     arena_reset(a, myarenasize, True)
     arena_reserve(b, round_up_for_allocation(llmemory.sizeof(SX)))
-    assert llmemory.cast_adr_to_ptr(b, SPTR).x == 0
+    assert_(llmemory.cast_adr_to_ptr(b, SPTR).x == 0)
     arena_free(a)
     return 42
 
@@ -334,7 +335,7 @@
             arena_reserve(a, llmemory.sizeof(S))
             p = llmemory.cast_adr_to_ptr(a + 23432, lltype.Ptr(S))
             p.x = 123
-            assert p.x == 123
+            assert_(p.x == 123)
             arena_protect(a, 65536, True)
             result = 0
             if testrun == 1:
diff --git a/rpython/rtyper/lltypesystem/test/test_llgroup.py b/rpython/rtyper/lltypesystem/test/test_llgroup.py
--- a/rpython/rtyper/lltypesystem/test/test_llgroup.py
+++ b/rpython/rtyper/lltypesystem/test/test_llgroup.py
@@ -1,3 +1,4 @@
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.llgroup import *
 from rpython.rtyper.lltypesystem.lloperation import llop
 from rpython.rtyper.test.test_llinterp import interpret
@@ -76,37 +77,37 @@
     #
     def f():
         p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, test.g1a)
-        assert p == test.p1a
+        assert_(p == test.p1a)
         p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, test.g1b)
-        assert p == test.p1b
+        assert_(p == test.p1b)
         p = llop.get_group_member(lltype.Ptr(test.S2), grpptr, test.g2a)
-        assert p == test.p2a
+        assert_(p == test.p2a)
         p = llop.get_group_member(lltype.Ptr(test.S2), grpptr, test.g2b)
-        assert p == test.p2b
+        assert_(p == test.p2b)
         #
         p = llop.get_next_group_member(lltype.Ptr(test.S2), grpptr,
                                        test.g1a, llmemory.sizeof(test.S1))
-        assert p == test.p2a
+        assert_(p == test.p2a)
         p = llop.get_next_group_member(lltype.Ptr(test.S2), grpptr,
                                        test.g2a, llmemory.sizeof(test.S2))
-        assert p == test.p2b
+        assert_(p == test.p2b)
         p = llop.get_next_group_member(lltype.Ptr(test.S1), grpptr,
                                        test.g2b, llmemory.sizeof(test.S2))
-        assert p == test.p1b
+        assert_(p == test.p1b)
         #
         expected = [123, 456]
         for i in range(2):
             p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, g1x[i])
-            assert p.x == expected[i]
+            assert_(p.x == expected[i])
         #
         for i in range(2):
             s = llop.extract_ushort(HALFWORD, cslist[i])
             p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, s)
-            assert p == test.p1b
-        assert cslist[0] & ~MASK == 0x45 << HALFSHIFT
-        assert cslist[1] & ~MASK == 0x41 << HALFSHIFT
-        assert cslist[0] >> HALFSHIFT == 0x45
-        assert cslist[1] >> (HALFSHIFT+1) == 0x41 >> 1
+            assert_(p == test.p1b)
+        assert_(cslist[0] & ~MASK == 0x45 << HALFSHIFT)
+        assert_(cslist[1] & ~MASK == 0x41 << HALFSHIFT)
+        assert_(cslist[0] >> HALFSHIFT == 0x45)
+        assert_(cslist[1] >> (HALFSHIFT+1) == 0x41 >> 1)
         #
         return 42
     return f
diff --git a/rpython/rtyper/lltypesystem/test/test_llmemory.py b/rpython/rtyper/lltypesystem/test/test_llmemory.py
--- a/rpython/rtyper/lltypesystem/test/test_llmemory.py
+++ b/rpython/rtyper/lltypesystem/test/test_llmemory.py
@@ -1,3 +1,4 @@
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.llmemory import *
 from rpython.rtyper.lltypesystem import lltype
 from rpython.rtyper.test.test_llinterp import interpret
@@ -40,7 +41,7 @@
     assert b.signed[0] == 123
     b.signed[0] = 234
     assert s2.s.x == 234
-    
+
 def test_array():
     A = lltype.GcArray(lltype.Signed)
     x = lltype.malloc(A, 5)
@@ -85,7 +86,7 @@
     o = AddressOffset()
     py.test.raises(TypeError, "1 + o")
     py.test.raises(TypeError, "o + 1")
-    
+
 def test_sizeof():
     # this is mostly an "assert not raises" sort of test
     array = lltype.Array(lltype.Signed)
@@ -421,7 +422,7 @@
     py.test.raises(RuntimeError, "p_s.x = 2")
     repr(adr)
     str(p_s)
-    
+
     T = lltype.GcStruct('T', ('s', S))
     adr = raw_malloc(sizeof(T))
     p_s = cast_adr_to_ptr(adr, lltype.Ptr(S))
@@ -431,7 +432,7 @@
     py.test.raises(RuntimeError, "p_s.x = 2")
     repr(adr)
     str(p_s)
-    
+
     U = lltype.Struct('U', ('y', lltype.Signed))
     T = lltype.GcStruct('T', ('x', lltype.Signed), ('u', U))
     adr = raw_malloc(sizeof(T))
@@ -446,10 +447,10 @@
 
 def test_raw_free_with_hdr():
     from rpython.memory.gcheader import GCHeaderBuilder
-    
+
     HDR = lltype.Struct('h', ('t', lltype.Signed))
     gh = GCHeaderBuilder(HDR).size_gc_header
-    
+
     A = lltype.GcArray(lltype.Signed)
     adr = raw_malloc(gh+sizeof(A, 10))
     p_a = cast_adr_to_ptr(adr+gh, lltype.Ptr(A))
@@ -471,7 +472,7 @@
     py.test.raises(RuntimeError, "p_s.x = 2")
     repr(adr)
     str(p_s)
-    
+
     T = lltype.GcStruct('T', ('s', S))
     adr = raw_malloc(gh+sizeof(T))
     p_s = cast_adr_to_ptr(adr+gh, lltype.Ptr(S))
@@ -482,7 +483,7 @@
     py.test.raises(RuntimeError, "p_s.x = 2")
     repr(adr)
     str(p_s)
-    
+
     U = lltype.Struct('U', ('y', lltype.Signed))
     T = lltype.GcStruct('T', ('x', lltype.Signed), ('u', U))
     adr = raw_malloc(gh+sizeof(T))
@@ -656,6 +657,6 @@
         ptr = lltype.malloc(A, 10)
         gcref = lltype.cast_opaque_ptr(GCREF, ptr)
         adr = lltype.cast_ptr_to_int(gcref)
-        assert adr == lltype.cast_ptr_to_int(ptr)
+        assert_(adr == lltype.cast_ptr_to_int(ptr))
     f()
     interpret(f, [])
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -1,6 +1,7 @@
 
 import py
 import sys
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.rffi import *
 from rpython.rtyper.lltypesystem.rffi import _keeper_for_type # crap
 from rpython.rlib.rposix import get_saved_errno, set_saved_errno
@@ -611,9 +612,9 @@
             p1bis = make(X1)
             p2bis = make(X2)
             structcopy(p1bis, p1)
-            assert p1bis.a == 5
-            assert p1bis.x2.x == 456
-            assert p1bis.p == p2
+            assert_(p1bis.a == 5)
+            assert_(p1bis.x2.x == 456)
+            assert_(p1bis.p == p2)
             structcopy(p2bis, p2)
             res = p2bis.x
             lltype.free(p2bis, flavor='raw')
@@ -697,11 +698,11 @@
         def f():
             raw = str2charp("XxxZy")
             n = str2chararray("abcdef", raw, 4)
-            assert raw[0] == 'a'
-            assert raw[1] == 'b'
-            assert raw[2] == 'c'
-            assert raw[3] == 'd'
-            assert raw[4] == 'y'
+            assert_(raw[0] == 'a')
+            assert_(raw[1] == 'b')
+            assert_(raw[2] == 'c')
+            assert_(raw[3] == 'd')
+            assert_(raw[4] == 'y')
             lltype.free(raw, flavor='raw')
             return n
 
@@ -796,9 +797,9 @@
     for i in xrange(len(data)):
         a[i] = data[i]
     a2 = ptradd(a, 2)
-    assert lltype.typeOf(a2) == lltype.typeOf(a) == lltype.Ptr(ARRAY_OF_CHAR)
+    assert_(lltype.typeOf(a2) == lltype.typeOf(a) == lltype.Ptr(ARRAY_OF_CHAR))
     for i in xrange(len(data) - 2):
-        assert a2[i] == a[i + 2]
+        assert_(a2[i] == a[i + 2])
     lltype.free(a, flavor='raw')
 
 def test_ptradd_interpret():
diff --git a/rpython/rtyper/lltypesystem/test/test_ztranslated.py b/rpython/rtyper/lltypesystem/test/test_ztranslated.py
--- a/rpython/rtyper/lltypesystem/test/test_ztranslated.py
+++ b/rpython/rtyper/lltypesystem/test/test_ztranslated.py
@@ -1,4 +1,5 @@
 import gc
+from rpython.rlib.objectmodel import assert_
 from rpython.translator.c.test.test_genc import compile
 from rpython.rtyper.lltypesystem import rffi
 from rpython.rtyper.lltypesystem import lltype
@@ -8,7 +9,7 @@
 def debug_assert(boolresult, msg):
     if not boolresult:
         llop.debug_print(lltype.Void, "\n\nassert failed: %s\n\n" % msg)
-        assert boolresult
+        assert_(boolresult)
 
 def use_str():
     mystr = b'abc'
diff --git a/rpython/rtyper/test/test_exception.py b/rpython/rtyper/test/test_exception.py
--- a/rpython/rtyper/test/test_exception.py
+++ b/rpython/rtyper/test/test_exception.py
@@ -1,5 +1,6 @@
 import py
 
+from rpython.rlib.objectmodel import assert_
 from rpython.translator.translator import TranslationContext
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.llinterp import LLException
diff --git a/rpython/rtyper/test/test_llann.py b/rpython/rtyper/test/test_llann.py
--- a/rpython/rtyper/test/test_llann.py
+++ b/rpython/rtyper/test/test_llann.py
@@ -1,6 +1,7 @@
 import py
 
 from rpython.annotator import model as annmodel
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.llannotation import SomePtr, lltype_to_annotation
 from rpython.conftest import option
 from rpython.rtyper.annlowlevel import (annotate_lowlevel_helper,
@@ -456,7 +457,7 @@
         s.y = y
         fptr = llhelper(F, f)
         gptr = llhelper(G, g)
-        assert typeOf(fptr) == F
+        assert_(typeOf(fptr) == F)
         return fptr(s, z)+fptr(s, z*2)+gptr(s)
 
     res = interpret(h, [8, 5, 2])
@@ -478,7 +479,7 @@
         s.x = x
         s.y = y
         fptr = llhelper(F, myfuncs[z])
-        assert typeOf(fptr) == F
+        assert_(typeOf(fptr) == F)
         return fptr(s)
 
     res = interpret(h, [80, 5, 0])
diff --git a/rpython/rtyper/test/test_llinterp.py b/rpython/rtyper/test/test_llinterp.py
--- a/rpython/rtyper/test/test_llinterp.py
+++ b/rpython/rtyper/test/test_llinterp.py
@@ -1,6 +1,7 @@
-from __future__ import with_statement
 import py
 import sys
+
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.lltype import typeOf, Void, malloc, free
 from rpython.rtyper.llinterp import LLInterpreter, LLException, log
 from rpython.rtyper.rmodel import inputconst
@@ -571,7 +572,7 @@
         with scoped_alloc(T, 1) as array:
             array[0] = -42
             x = array[0]
-        assert x == -42
+        assert_(x == -42)
 
     res = interpret(f, [])
 
diff --git a/rpython/rtyper/test/test_nongc.py b/rpython/rtyper/test/test_nongc.py
--- a/rpython/rtyper/test/test_nongc.py
+++ b/rpython/rtyper/test/test_nongc.py
@@ -1,10 +1,10 @@
 import py
 
+from rpython.rlib.objectmodel import assert_, free_non_gc_object
 from rpython.annotator import model as annmodel
+from rpython.annotator.annrpython import RPythonAnnotator
 from rpython.rtyper.llannotation import SomeAddress
-from rpython.annotator.annrpython import RPythonAnnotator
 from rpython.rtyper.rtyper import RPythonTyper
-from rpython.rlib.objectmodel import free_non_gc_object
 from rpython.rtyper.test.test_llinterp import interpret as llinterpret
 
 def interpret(f, args):
@@ -100,13 +100,13 @@
         if i == 0:
             pass
         elif i == 1:
-            assert isinstance(o, A)
+            assert_(isinstance(o, A))
             free_non_gc_object(o)
         elif i == 2:
-            assert isinstance(o, B)
+            assert_(isinstance(o, B))
             free_non_gc_object(o)
         else:
-            assert isinstance(o, C)
+            assert_(isinstance(o, C))
             free_non_gc_object(o)
         return res
 
diff --git a/rpython/rtyper/test/test_rclass.py b/rpython/rtyper/test/test_rclass.py
--- a/rpython/rtyper/test/test_rclass.py
+++ b/rpython/rtyper/test/test_rclass.py
@@ -4,6 +4,7 @@
 
 from rpython.flowspace.model import summary
 from rpython.rlib.rarithmetic import r_longlong
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.lltype import (typeOf, Signed, getRuntimeTypeInfo,
     identityhash)
 from rpython.rtyper.error import TyperError
@@ -1248,13 +1249,13 @@
                 self.data[i] = v
 
             def __getslice__(self, start, stop):
-                assert start >= 0
-                assert stop >= 0
+                assert_(start >= 0)
+                assert_(stop >= 0)
                 return self.data[start:stop]
 
             def __setslice__(self, start, stop, v):
-                assert start >= 0
-                assert stop >= 0
+                assert_(start >= 0)
+                assert_(stop >= 0)
                 i = 0
                 for n in range(start, stop):
                     self.data[n] = v[i]
diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py
--- a/rpython/rtyper/test/test_rdict.py
+++ b/rpython/rtyper/test/test_rdict.py
@@ -2,22 +2,24 @@
 from contextlib import contextmanager
 import signal
 
-from rpython.translator.translator import TranslationContext
-from rpython.annotator.model import (
-    SomeInteger, SomeString, SomeChar, SomeUnicodeString, SomeUnicodeCodePoint)
-from rpython.annotator.dictdef import DictKey, DictValue
-from rpython.rtyper.lltypesystem import lltype, rffi
-from rpython.rtyper.lltypesystem import rdict
-from rpython.rtyper.test.tool import BaseRtypingTest
-from rpython.rlib.objectmodel import r_dict
-from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong
-
 import py
 from hypothesis import settings
 from hypothesis.strategies import (
     builds, sampled_from, binary, just, integers, text, characters, tuples)
 from hypothesis.stateful import GenericStateMachine, run_state_machine_as_test
 
+from rpython.translator.translator import TranslationContext
+from rpython.annotator.model import (
+    SomeInteger, SomeString, SomeChar, SomeUnicodeString, SomeUnicodeCodePoint)
+from rpython.annotator.dictdef import DictKey, DictValue
+from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong
+from rpython.rlib.objectmodel import assert_
+from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rtyper.lltypesystem import rdict
+from rpython.rtyper.test.tool import BaseRtypingTest
+from rpython.rlib.objectmodel import r_dict
+
+
 def ann2strategy(s_value):
     if isinstance(s_value, SomeChar):
         return builds(chr, integers(min_value=0, max_value=255))
@@ -192,7 +194,7 @@
             for value in d.itervalues():
                 k2 = k2 * value
             for key, value in d.iteritems():
-                assert d[key] == value
+                assert_(d[key] == value)
                 k3 = k3 * value
             return k1 + k2 + k3
         res = self.interpret(func, [])
@@ -702,15 +704,15 @@
             d[5] = 2
             d[6] = 3
             k1, v1 = d.popitem()
-            assert len(d) == 1
+            assert_(len(d) == 1)
             k2, v2 = d.popitem()
             try:
                 d.popitem()
             except KeyError:
                 pass
             else:
-                assert 0, "should have raised KeyError"
-            assert len(d) == 0
+                assert_(0, "should have raised KeyError")
+            assert_(len(d) == 0)
             return k1*1000 + v1*100 + k2*10 + v2
 
         res = self.interpret(func, [])
@@ -960,15 +962,15 @@
             d[5] = 2
             d[6] = 3
             k1, v1 = d.popitem()
-            assert len(d) == 1
+            assert_(len(d) == 1)
             k2, v2 = d.popitem()
             try:
                 d.popitem()
             except KeyError:
                 pass
             else:
-                assert 0, "should have raised KeyError"
-            assert len(d) == 0
+                assert_(0, "should have raised KeyError")
+            assert_(len(d) == 0)
             return k1*1000 + v1*100 + k2*10 + v2
 
         res = self.interpret(func, [])
diff --git a/rpython/rtyper/test/test_rint.py b/rpython/rtyper/test/test_rint.py
--- a/rpython/rtyper/test/test_rint.py
+++ b/rpython/rtyper/test/test_rint.py
@@ -1,10 +1,12 @@
 import py
-import sys, operator
+import sys
+import operator
+
 from rpython.translator.translator import TranslationContext
+from rpython.rlib.objectmodel import assert_, compute_hash
 from rpython.rtyper.test import snippet
 from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong
 from rpython.rlib.rarithmetic import ovfcheck, r_int64, intmask, int_between
-from rpython.rlib import objectmodel
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.flowspace.model import summary
 
@@ -392,16 +394,16 @@
 
     def test_int_py_div_nonnegargs(self):
         def f(x, y):
-            assert x >= 0
-            assert y >= 0
+            assert_(x >= 0)
+            assert_(y >= 0)
             return x // y
         res = self.interpret(f, [1234567, 123])
         assert res == 1234567 // 123
 
     def test_int_py_mod_nonnegargs(self):
         def f(x, y):
-            assert x >= 0
-            assert y >= 0
+            assert_(x >= 0)
+            assert_(y >= 0)
             return x % y
         res = self.interpret(f, [1234567, 123])
         assert res == 1234567 % 123
@@ -418,7 +420,7 @@
 
     def test_hash(self):
         def f(x):
-            return objectmodel.compute_hash(x)
+            return compute_hash(x)
         res = self.interpret(f, [123456789])
         assert res == 123456789
         res = self.interpret(f, [r_int64(123456789012345678)])
diff --git a/rpython/rtyper/test/test_rlist.py b/rpython/rtyper/test/test_rlist.py
--- a/rpython/rtyper/test/test_rlist.py
+++ b/rpython/rtyper/test/test_rlist.py
@@ -3,11 +3,13 @@
 
 import py
 
+from rpython.rlib.objectmodel import assert_, newlist_hint, resizelist_hint
 from rpython.rtyper.debug import ll_assert
 from rpython.rtyper.error import TyperError
 from rpython.rtyper.llinterp import LLException, LLAssertFailure
 from rpython.rtyper.lltypesystem import rlist as ll_rlist
-from rpython.rtyper.lltypesystem.rlist import ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist
+from rpython.rtyper.lltypesystem.rlist import (
+    ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist)
 from rpython.rtyper.rint import signed_repr
 from rpython.rtyper.rlist import *
 from rpython.rtyper.test.tool import BaseRtypingTest
@@ -959,7 +961,7 @@
             x = l.pop()
             x = l.pop()
             x = l2.pop()
-            return str(x)+";"+str(l)
+            return str(x) + ";" + str(l)
         res = self.ll_to_string(self.interpret(fn, []))
         res = res.replace('rpython.rtyper.test.test_rlist.', '')
         res = re.sub(' at 0x[a-z0-9]+', '', res)
@@ -1167,7 +1169,7 @@
             lst = [fr, fr]
             lst.append(fr)
             del lst[1]
-            assert lst[0] is fr
+            assert_(lst[0] is fr)
             return len(lst)
         res = self.interpret(f, [])
         assert res == 2
@@ -1202,9 +1204,9 @@
     def test_list_equality(self):
         def dummyfn(n):
             lst = [12] * n
-            assert lst == [12, 12, 12]
+            assert_(lst == [12, 12, 12])
             lst2 = [[12, 34], [5], [], [12, 12, 12], [5]]
-            assert lst in lst2
+            assert_(lst in lst2)
         self.interpret(dummyfn, [3])
 
     def test_list_remove(self):
@@ -1215,7 +1217,6 @@
         res = self.interpret(dummyfn, [1, 0])
         assert res == 0
 
-
     def test_getitem_exc_1(self):
         def f(x):
             l = [1]
@@ -1339,7 +1340,7 @@
     def test_charlist_extension_2(self):
         def f(n, i):
             s = 'hello%d' % n
-            assert 0 <= i <= len(s)
+            assert_(0 <= i <= len(s))
             l = ['a', 'b']
             l += s[i:]
             return ''.join(l)
@@ -1349,7 +1350,7 @@
     def test_unicharlist_extension_2(self):
         def f(n, i):
             s = 'hello%d' % n
-            assert 0 <= i <= len(s)
+            assert_(0 <= i <= len(s))
             l = [u'a', u'b']
             l += s[i:]
             return ''.join([chr(ord(c)) for c in l])
@@ -1359,7 +1360,7 @@
     def test_extend_a_non_char_list_2(self):
         def f(n, i):
             s = 'hello%d' % n
-            assert 0 <= i <= len(s)
+            assert_(0 <= i <= len(s))
             l = ['foo', 'bar']
             l += s[i:]      # NOT SUPPORTED for now if l is not a list of chars
             return ''.join(l)
@@ -1368,7 +1369,7 @@
     def test_charlist_extension_3(self):
         def f(n, i, j):
             s = 'hello%d' % n
-            assert 0 <= i <= j <= len(s)
+            assert_(0 <= i <= j <= len(s))
             l = ['a', 'b']
             l += s[i:j]
             return ''.join(l)
@@ -1378,7 +1379,7 @@
     def test_unicharlist_extension_3(self):
         def f(n, i, j):
             s = 'hello%d' % n
-            assert 0 <= i <= j <= len(s)
+            assert_(0 <= i <= j <= len(s))
             l = [u'a', u'b']
             l += s[i:j]
             return ''.join([chr(ord(c)) for c in l])
@@ -1491,8 +1492,6 @@
                    ("y[*]" in immutable_fields)
 
     def test_hints(self):
-        from rpython.rlib.objectmodel import newlist_hint
-
         strings = ['abc', 'def']
         def f(i):
             z = strings[i]
@@ -1569,8 +1568,8 @@
     def test_no_unneeded_refs(self):
         def fndel(p, q):
             lis = ["5", "3", "99"]
-            assert q >= 0
-            assert p >= 0
+            assert_(q >= 0)
+            assert_(p >= 0)
             del lis[p:q]
             return lis
         def fnpop(n):
@@ -1677,7 +1676,6 @@
 
     def test_extend_was_not_overallocating(self):
         from rpython.rlib import rgc
-        from rpython.rlib.objectmodel import resizelist_hint
         from rpython.rtyper.lltypesystem import lltype
         old_arraycopy = rgc.ll_arraycopy
         try:
diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py
--- a/rpython/rtyper/test/test_rordereddict.py
+++ b/rpython/rtyper/test/test_rordereddict.py
@@ -1,10 +1,10 @@
 import py
-import random
 from collections import OrderedDict
 
 from hypothesis import settings, given, strategies
 from hypothesis.stateful import run_state_machine_as_test
 
+from rpython.rlib.objectmodel import assert_, r_ordereddict
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem import rordereddict, rstr
 from rpython.rlib.rarithmetic import intmask
@@ -387,7 +387,7 @@
 
     @staticmethod
     def new_r_dict(myeq, myhash):
-        return objectmodel.r_ordereddict(myeq, myhash)
+        return r_ordereddict(myeq, myhash)
 
     def test_two_dicts_with_different_value_types(self):
         def func(i):
@@ -406,14 +406,14 @@
             d1['key2'] = 'value2'
             for i in range(20):
                 objectmodel.move_to_end(d1, 'key1')
-                assert d1.keys() == ['key2', 'key1']
+                assert_(d1.keys() == ['key2', 'key1'])
                 objectmodel.move_to_end(d1, 'key2')
-                assert d1.keys() == ['key1', 'key2']
+                assert_(d1.keys() == ['key1', 'key2'])
             for i in range(20):
                 objectmodel.move_to_end(d1, 'key2', last=False)
-                assert d1.keys() == ['key2', 'key1']
+                assert_(d1.keys() == ['key2', 'key1'])
                 objectmodel.move_to_end(d1, 'key1', last=False)
-                assert d1.keys() == ['key1', 'key2']
+                assert_(d1.keys() == ['key1', 'key2'])
         func()
         self.interpret(func, [])
 
diff --git a/rpython/rtyper/test/test_rpbc.py b/rpython/rtyper/test/test_rpbc.py
--- a/rpython/rtyper/test/test_rpbc.py
+++ b/rpython/rtyper/test/test_rpbc.py
@@ -2,6 +2,7 @@
 
 from rpython.annotator import model as annmodel
 from rpython.annotator import specialize
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.lltype import typeOf
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.llannotation import SomePtr, lltype_to_annotation
@@ -1604,7 +1605,7 @@
             try:
                 o.m()
             except KeyError:
-                assert 0
+                raise ValueError
             return B().m()
 
         self.interpret_raises(KeyError, f, [7])
@@ -1717,7 +1718,7 @@
         def cb2():
             pass
         def g(cb, result):
-            assert (cb is None) == (result == 0)
+            assert_((cb is None) == (result == 0))
         def h(cb):
             cb()
         def f():
diff --git a/rpython/rtyper/test/test_rptr.py b/rpython/rtyper/test/test_rptr.py
--- a/rpython/rtyper/test/test_rptr.py
+++ b/rpython/rtyper/test/test_rptr.py
@@ -6,9 +6,11 @@
 from rpython.rtyper.llannotation import SomePtr
 from rpython.annotator.annrpython import RPythonAnnotator
 from rpython.rlib.rarithmetic import is_valid_int
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.annlowlevel import annotate_lowlevel_helper, LowLevelAnnotatorPolicy
 from rpython.rtyper.lltypesystem import llmemory, lltype
 from rpython.rtyper.rtyper import RPythonTyper
+from rpython.rtyper.test.test_llinterp import interpret
 
 
 # ____________________________________________________________
@@ -50,7 +52,6 @@
     assert s == annmodel.SomeTuple([SomePtr(lltype.Ptr(lltype.RuntimeTypeInfo)),
                                     annmodel.SomeBool()])
 
-from rpython.rtyper.test.test_llinterp import interpret, gengraph
 
 def test_adtmeths():
     policy = LowLevelAnnotatorPolicy()
@@ -86,7 +87,6 @@
     assert lltype.typeOf(a) == lltype.Ptr(A)
     assert len(a) == 10
 
-
     def f():
         a = A.h_alloc(10)
         return a.h_length()
@@ -104,15 +104,15 @@
     S = lltype.GcStruct('S', ('t', T))
     PT = lltype.Ptr(T)
     PS = lltype.Ptr(S)
+
     def fn(n):
         s = lltype.cast_int_to_ptr(PS, n)
-        assert lltype.typeOf(s) == PS
-        assert lltype.cast_ptr_to_int(s) == n
+        assert_(lltype.typeOf(s) == PS)
+        assert_(lltype.cast_ptr_to_int(s) == n)
         t = lltype.cast_pointer(PT, s)
-        assert lltype.typeOf(t) == PT
-        assert lltype.cast_ptr_to_int(t) == n
-        assert s == lltype.cast_pointer(PS, t)
-
+        assert_(lltype.typeOf(t) == PT)
+        assert_(lltype.cast_ptr_to_int(t) == n)
+        assert_(s == lltype.cast_pointer(PS, t))
     interpret(fn, [11521])
 
 def test_odd_ints_opaque():
@@ -120,12 +120,13 @@
     Q = lltype.GcOpaqueType('Q')
     PT = lltype.Ptr(T)
     PQ = lltype.Ptr(Q)
+
     def fn(n):
         t = lltype.cast_int_to_ptr(PT, n)
-        assert lltype.typeOf(t) == PT
-        assert lltype.cast_ptr_to_int(t) == n
+        assert_(lltype.typeOf(t) == PT)
+        assert_(lltype.cast_ptr_to_int(t) == n)
         o = lltype.cast_opaque_ptr(PQ, t)
-        assert lltype.cast_ptr_to_int(o) == n
+        assert_(lltype.cast_ptr_to_int(o) == n)
 
     fn(13)
     interpret(fn, [11521])
@@ -384,6 +385,7 @@
 
 def test_interior_ptr_with_setitem():
     T = lltype.GcStruct("T", ('s', lltype.Array(lltype.Signed)))
+
     def f():
         t = lltype.malloc(T, 1)
         t.s[0] = 1
@@ -393,18 +395,21 @@
 
 def test_isinstance_ptr():
     S = lltype.GcStruct("S", ('x', lltype.Signed))
+
     def f(n):
         x = isinstance(lltype.Signed, lltype.Ptr)
         return x + (lltype.typeOf(x) is lltype.Ptr(S)) + len(n)
+
     def lltest():
         f([])
         return f([1])
     s, t = ll_rtype(lltest, [])
-    assert s.is_constant() == False
+    assert s.is_constant() is False
 
 def test_staticadtmeths():
     ll_func = lltype.staticAdtMethod(lambda x: x + 42)
     S = lltype.GcStruct('S', adtmeths={'ll_func': ll_func})
+
     def f():
         return lltype.malloc(S).ll_func(5)
     s, t = ll_rtype(f, [])
diff --git a/rpython/rtyper/test/test_rstr.py b/rpython/rtyper/test/test_rstr.py
--- a/rpython/rtyper/test/test_rstr.py
+++ b/rpython/rtyper/test/test_rstr.py
@@ -4,6 +4,7 @@
 
 from rpython.flowspace.model import summary
 from rpython.annotator.model import AnnotatorError
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem.lltype import typeOf, Signed, malloc
 from rpython.rtyper.lltypesystem.rstr import LLHelpers, STR
 from rpython.rtyper.rstr import AbstractLLHelpers
@@ -357,20 +358,24 @@
 
     def test_find_with_start(self):
         const = self.const
+
         def fn(i):
-            assert i >= 0
+            assert_(i >= 0)
             return const('ababcabc').find(const('abc'), i)
+
         for i in range(9):
             res = self.interpret(fn, [i])
             assert res == fn(i)
 
     def test_find_with_start_end(self):
         const = self.const
+
         def fn(i, j):
-            assert i >= 0
-            assert j >= 0
+            assert_(i >= 0)
+            assert_(j >= 0)
             return (const('ababcabc').find(const('abc'), i, j) +
                     const('ababcabc').find(const('b'), i, j) * 100)
+
         for (i, j) in [(1,7), (2,6), (3,7), (3,8), (4,99), (7, 99)]:
             res = self.interpret(fn, [i, j])
             assert res == fn(i, j)
@@ -388,14 +393,16 @@
 
     def test_find_empty_string(self):
         const = self.const
+
         def f(i):
-            assert i >= 0
+            assert_(i >= 0)
             s = const("abc")
             x = s.find(const(''))
             x+= s.find(const(''), i)*10
             x+= s.find(const(''), i, i)*100
             x+= s.find(const(''), i, i+1)*1000
             return x
+
         for i, expected in enumerate([0, 1110, 2220, 3330, -1110, -1110]):
             res = self.interpret(f, [i])
             assert res == expected
@@ -418,14 +425,16 @@
 
     def test_rfind_empty_string(self):
         const = self.const
+
         def f(i):
-            assert i >= 0
+            assert_(i >= 0)
             s = const("abc")
             x = s.rfind(const(''))
             x+= s.rfind(const(''), i)*10
             x+= s.rfind(const(''), i, i)*100
             x+= s.rfind(const(''), i, i+1)*1000
             return x
+
         for i, expected in enumerate([1033, 2133, 3233, 3333, 3-1110, 3-1110]):
             res = self.interpret(f, [i])
             assert res == expected
@@ -557,7 +566,7 @@
 
         def fn(i):
             c = ["a", "b", "c"]
-            assert i >= 0
+            assert_(i >= 0)
             return const('').join(c[i:])
         res = self.interpret(fn, [0])
         assert self.ll_to_string(res) == const("abc")
diff --git a/rpython/rtyper/test/test_rtuple.py b/rpython/rtyper/test/test_rtuple.py
--- a/rpython/rtyper/test/test_rtuple.py
+++ b/rpython/rtyper/test/test_rtuple.py
@@ -1,11 +1,11 @@
 import py
+from rpython.rlib.objectmodel import assert_, compute_hash
 from rpython.rtyper.rtuple import TUPLE_TYPE, TupleRepr
 from rpython.rtyper.lltypesystem.lltype import Signed, Bool
 from rpython.rtyper.rbool import bool_repr
 from rpython.rtyper.rint import signed_repr
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rtyper.error import TyperError
-from rpython.rlib.objectmodel import compute_hash
 from rpython.translator.translator import TranslationContext
 
 
@@ -290,7 +290,7 @@
             res = []
             for x in lst:
                 res.append(list(x))
-            assert res[0] == res[1] == res[2] == []
+            assert_(res[0] == res[1] == res[2] == [])
         self.interpret(f, [])
 
     def test_slice(self):
@@ -299,14 +299,14 @@
             return t[1:] + t[:-1] + t[12:] + t[0:2]
         def f(n):
             res = g(n)
-            assert len(res) == 6
-            assert res[0] == "hello"
-            assert res[1] == n
-            assert res[2] == 1.5
-            assert res[3] == "hello"
-            assert res[4] == 1.5
-            assert res[5] == "hello"
-        self.interpret(f, [9])
+            assert_(len(res) == 6)
+            assert_(res[0] == "hello")
+            assert_(res[1] == n)
+            assert_(res[2] == 1.5)
+            assert_(res[3] == "hello")
+            assert_(res[4] == 1.5)
+            assert_(res[5] == "hello")
+        res = self.interpret(f, [9])
 
     def test_tuple_eq(self):
         def f(n):
@@ -350,8 +350,8 @@
 
     def test_tuple_str(self):
         def f(n):
-            assert str(()) == "()"
-            assert str((n,)) == "(%d,)" % n
-            assert str((n, 6)) == "(%d, 6)" % n
-            assert str(((n,),)) == "((%d,),)" % n
+            assert_(str(()) == "()")
+            assert_(str((n,)) == "(%d,)" % n)
+            assert_(str((n, 6)) == "(%d, 6)" % n)
+            assert_(str(((n,),)) == "((%d,),)" % n)
         self.interpret(f, [3])
diff --git a/rpython/rtyper/test/test_rweakref.py b/rpython/rtyper/test/test_rweakref.py
--- a/rpython/rtyper/test/test_rweakref.py
+++ b/rpython/rtyper/test/test_rweakref.py
@@ -1,5 +1,7 @@
-import py, weakref
+import weakref
+
 from rpython.rlib import rgc
+from rpython.rlib.objectmodel import assert_
 from rpython.rtyper.lltypesystem import lltype, llmemory
 from rpython.rtyper.test.tool import BaseRtypingTest
 
@@ -68,9 +70,9 @@
                 r = w2
             return r() is not None
         res = self.interpret(f, [1])
-        assert res == False
+        assert res is False
         res = self.interpret(f, [0])
-        assert res == True
+        assert res is True
 
     def test_multiple_prebuilt_dead_weakrefs(self):
         class A:
@@ -95,22 +97,22 @@
                     r = w1
                 else:
                     r = w3
-                assert r() is None
+                assert_(r() is None)
             else:
                 if n < -5:
                     r = w2
                 else:
                     r = w4
-                assert r() is not None
+                assert_(r() is not None)
             return r() is not None
         res = self.interpret(f, [1])
-        assert res == False
+        assert res is False
         res = self.interpret(f, [0])
-        assert res == True
+        assert res is True
         res = self.interpret(f, [100])
-        assert res == False
+        assert res is False
         res = self.interpret(f, [-100])
-        assert res == True
+        assert res is True
 
     def test_pbc_null_weakref(self):
         class A:
@@ -124,12 +126,12 @@
         assert self.interpret(fn, [1]) is True
 
     def test_ll_weakref(self):
-        S = lltype.GcStruct('S', ('x',lltype.Signed))
+        S = lltype.GcStruct('S', ('x', lltype.Signed))
         def g():
             s = lltype.malloc(S)
             w = llmemory.weakref_create(s)
-            assert llmemory.weakref_deref(lltype.Ptr(S), w) == s
-            assert llmemory.weakref_deref(lltype.Ptr(S), w) == s
+            assert_(llmemory.weakref_deref(lltype.Ptr(S), w) == s)
+            assert_(llmemory.weakref_deref(lltype.Ptr(S), w) == s)
             return w   # 's' is forgotten here
         def f():
             w = g()
@@ -152,7 +154,7 @@
         def fn(i):
             w = g()
             rgc.collect()
-            assert w() is not None
+            assert_(w() is not None)
             return mylist[i] is None
 
         assert self.interpret(fn, [0], rweakref=False) is False

From pypy.commits at gmail.com  Sun Nov  5 10:09:43 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 05 Nov 2017 07:09:43 -0800 (PST)
Subject: [pypy-commit] pypy assert-rewrite: Fix asserts in test_newgc.py
Message-ID: <59ff29b7.028b1c0a.476b5.69eb@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: assert-rewrite
Changeset: r92948:fe04fd3b8632
Date: 2017-11-05 15:09 +0000
http://bitbucket.org/pypy/pypy/changeset/fe04fd3b8632/

Log:	Fix asserts in test_newgc.py

diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py
--- a/rpython/translator/c/test/test_newgc.py
+++ b/rpython/translator/c/test/test_newgc.py
@@ -9,7 +9,8 @@
 
 from rpython.conftest import option
 from rpython.rlib import rgc
-from rpython.rlib.objectmodel import keepalive_until_here, compute_hash, compute_identity_hash, r_dict
+from rpython.rlib.objectmodel import (
+    assert_, keepalive_until_here, compute_hash, compute_identity_hash, r_dict)
 from rpython.rlib.rstring import StringBuilder
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.rtyper.lltypesystem.lloperation import llop
@@ -107,7 +108,7 @@
             funcstr = funcsstr[num]
             if funcstr:
                 return funcstr(arg)
-            assert 0, 'unreachable'
+            assert_(0, 'unreachable')
         cls.funcsstr = funcsstr
         cls.c_allfuncs = staticmethod(cls._makefunc_str_int(allfuncs))
         cls.allfuncs = staticmethod(allfuncs)
@@ -516,7 +517,7 @@
                 if i & 1 == 0:
                     a = A()
                     a.index = i
-                assert a is not None
+                assert_(a is not None)
                 weakrefs.append(weakref.ref(a))
                 if i % 7 == 6:
                     keepalive.append(a)
@@ -525,9 +526,9 @@
             for i in range(n):
                 a = weakrefs[i]()
                 if i % 7 == 6:
-                    assert a is not None
+                    assert_(a is not None)
                 if a is not None:
-                    assert a.index == i & ~1
+                    assert_(a.index == i & ~1)
                 else:
                     count_free += 1
             return count_free
@@ -585,10 +586,10 @@
             for n in range(len(dlist)):
                 d = dlist[n]
                 keys = keyslist[n]
-                assert len(d) == len(keys)
+                assert_(len(d) == len(keys))
                 i = 0
                 while i < len(keys):
-                    assert d[keys[i]] == i
+                    assert_(d[keys[i]] == i)
                     i += 1
             return 42
         return fn
@@ -701,13 +702,13 @@
         def does_stuff():
             fd = os.open(filename, os.O_WRONLY | os.O_CREAT, 0777)
             count = os.write(fd, "hello world\n")
-            assert count == len("hello world\n")
+            assert_(count == len("hello world\n"))
             os.close(fd)
             fd = os.open(filename, os.O_RDONLY, 0777)
             result = os.lseek(fd, 1, 0)
-            assert result == 1
+            assert_(result == 1)
             data = os.read(fd, 500)
-            assert data == "ello world\n"
+            assert_(data == "ello world\n")
             os.close(fd)
             return 0
 
@@ -870,7 +871,7 @@
                 # ^^^ likely to trigger a collection
                 xr = xr.prev
                 i += 1
-            assert xr is None
+            assert_(xr is None)
 
         def check(xr, n, step):
             "Check that the identity hashes are still correct."
@@ -882,7 +883,7 @@
                     raise ValueError
                 xr = xr.prev
                 i += 1
-            assert xr is None
+            assert_(xr is None)
 
         def h(n):
             x3 = g(3)
@@ -947,7 +948,7 @@
             for i in range(20):
                 x.append((1, lltype.malloc(S)))
             for i in range(50):
-                assert l2[i] == (40 + i) * 3
+                assert_(l2[i] == (40 + i) * 3)
             return 0
 
         return fn
@@ -965,7 +966,7 @@
             rgc.ll_arraycopy(l, l2, 40, 0, 50)
             rgc.collect()
             for i in range(50):
-                assert l2[i] == l[40 + i]
+                assert_(l2[i] == l[40 + i])
             return 0
 
         return fn
@@ -985,7 +986,7 @@
                     found = True
                 if x == lltype.cast_opaque_ptr(llmemory.GCREF, s.u):
                     os.write(2, "s.u should not be found!\n")
-                    assert False
+                    assert_(False)
             return found == 1
 
         def fn():
@@ -994,7 +995,7 @@
             found = g(s)
             if not found:
                 os.write(2, "not found!\n")
-                assert False
+                assert_(False)
             s.u.x = 42
             return 0
 
@@ -1013,8 +1014,8 @@
             gcref1 = lltype.cast_opaque_ptr(llmemory.GCREF, s)
             gcref2 = lltype.cast_opaque_ptr(llmemory.GCREF, s.u)
             lst = rgc.get_rpy_referents(gcref1)
-            assert gcref2 in lst
-            assert gcref1 not in lst
+            assert_(gcref2 in lst)
+            assert_(gcref1 not in lst)
             s.u.x = 42
             return 0
 
@@ -1030,7 +1031,7 @@
 
         def check(gcref, expected):
             result = rgc._is_rpy_instance(gcref)
-            assert result == expected
+            assert_(result == expected)
 
         def fn():
             s = lltype.malloc(S)
@@ -1060,21 +1061,21 @@
         def fn():
             foo = Foo()
             gcref1 = rgc.cast_instance_to_gcref(foo)
-            assert rgc.try_cast_gcref_to_instance(Foo,    gcref1) is foo
-            assert rgc.try_cast_gcref_to_instance(FooBar, gcref1) is None
-            assert rgc.try_cast_gcref_to_instance(Biz,    gcref1) is None
+            assert_(rgc.try_cast_gcref_to_instance(Foo, gcref1) is foo)
+            assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref1) is None)
+            assert_(rgc.try_cast_gcref_to_instance(Biz, gcref1) is None)
 
             foobar = FooBar()
             gcref2 = rgc.cast_instance_to_gcref(foobar)
-            assert rgc.try_cast_gcref_to_instance(Foo,    gcref2) is foobar
-            assert rgc.try_cast_gcref_to_instance(FooBar, gcref2) is foobar
-            assert rgc.try_cast_gcref_to_instance(Biz,    gcref2) is None
+            assert_(rgc.try_cast_gcref_to_instance(Foo, gcref2) is foobar)
+            assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref2) is foobar)
+            assert_(rgc.try_cast_gcref_to_instance(Biz, gcref2) is None)
 
             s = lltype.malloc(S)
             gcref3 = lltype.cast_opaque_ptr(llmemory.GCREF, s)
-            assert rgc.try_cast_gcref_to_instance(Foo,    gcref3) is None
-            assert rgc.try_cast_gcref_to_instance(FooBar, gcref3) is None
-            assert rgc.try_cast_gcref_to_instance(Biz,    gcref3) is None
+            assert_(rgc.try_cast_gcref_to_instance(Foo, gcref3) is None)
+            assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref3) is None)
+            assert_(rgc.try_cast_gcref_to_instance(Biz, gcref3) is None)
 
             return 0
 
@@ -1101,13 +1102,13 @@
             a = lltype.malloc(A, 1000)
             gcref1 = lltype.cast_opaque_ptr(llmemory.GCREF, s)
             int1 = rgc.get_rpy_memory_usage(gcref1)
-            assert 8 <= int1 <= 32
+            assert_(8 <= int1 <= 32)
             gcref2 = lltype.cast_opaque_ptr(llmemory.GCREF, s.u)
             int2 = rgc.get_rpy_memory_usage(gcref2)
-            assert 4 * 9 <= int2 <= 8 * 12
+            assert_(4 * 9 <= int2 <= 8 * 12)
             gcref3 = lltype.cast_opaque_ptr(llmemory.GCREF, a)
             int3 = rgc.get_rpy_memory_usage(gcref3)
-            assert 4 * 1001 <= int3 <= 8 * 1010
+            assert_(4 * 1001 <= int3 <= 8 * 1010)
             return 0
 
         return fn
@@ -1133,10 +1134,10 @@
             int3 = rgc.get_rpy_type_index(gcref3)
             gcref4 = lltype.cast_opaque_ptr(llmemory.GCREF, s2)
             int4 = rgc.get_rpy_type_index(gcref4)
-            assert int1 != int2
-            assert int1 != int3
-            assert int2 != int3
-            assert int1 == int4
+            assert_(int1 != int2)
+            assert_(int1 != int3)
+            assert_(int2 != int3)
+            assert_(int1 == int4)
             return 0
 
         return fn
@@ -1216,8 +1217,8 @@
             os.close(fd)
             #
             a = rgc.get_typeids_list()
-            assert len(a) > 1
-            assert 0 < rffi.cast(lltype.Signed, a[1]) < 10000
+            assert_(len(a) > 1)
+            assert_(0 < rffi.cast(lltype.Signed, a[1]) < 10000)
             return 0
 
         return fn
@@ -1240,20 +1241,20 @@
             a2 = A()
             if not rgc.has_gcflag_extra():
                 return 0     # cannot test it then
-            assert rgc.get_gcflag_extra(a1) == False
-            assert rgc.get_gcflag_extra(a2) == False
+            assert_(rgc.get_gcflag_extra(a1) == False)
+            assert_(rgc.get_gcflag_extra(a2) == False)
             rgc.toggle_gcflag_extra(a1)
-            assert rgc.get_gcflag_extra(a1) == True
-            assert rgc.get_gcflag_extra(a2) == False
+            assert_(rgc.get_gcflag_extra(a1) == True)
+            assert_(rgc.get_gcflag_extra(a2) == False)
             rgc.toggle_gcflag_extra(a2)
-            assert rgc.get_gcflag_extra(a1) == True
-            assert rgc.get_gcflag_extra(a2) == True
+            assert_(rgc.get_gcflag_extra(a1) == True)
+            assert_(rgc.get_gcflag_extra(a2) == True)
             rgc.toggle_gcflag_extra(a1)
-            assert rgc.get_gcflag_extra(a1) == False
-            assert rgc.get_gcflag_extra(a2) == True
+            assert_(rgc.get_gcflag_extra(a1) == False)
+            assert_(rgc.get_gcflag_extra(a2) == True)
             rgc.toggle_gcflag_extra(a2)
-            assert rgc.get_gcflag_extra(a1) == False
-            assert rgc.get_gcflag_extra(a2) == False
+            assert_(rgc.get_gcflag_extra(a1) == False)
+            assert_(rgc.get_gcflag_extra(a2) == False)
             return 0
         return fn
 
@@ -1271,11 +1272,11 @@
 
         def fn():
             s = lltype.malloc(S, zero=True)
-            assert s.x == 0
+            assert_(s.x == 0)
             s2 = lltype.malloc(S2, zero=True)
-            assert s2.parent.x == 0
+            assert_(s2.parent.x == 0)
             a = lltype.malloc(A, 3, zero=True)
-            assert a[2] == 0
+            assert_(a[2] == 0)
             # XXX not supported right now in gctransform/framework.py:
             #b = lltype.malloc(B, 3, zero=True)
             #assert len(b.y) == 3
@@ -1307,7 +1308,7 @@
     def test_long_chain_of_instances(self):
         res = self.run("long_chain_of_instances")
         assert res == 1500
-        
+
 
 class TestSemiSpaceGC(UsingFrameworkTest, snippet.SemiSpaceGCTestDefines):
     gcpolicy = "semispace"
@@ -1475,7 +1476,7 @@
 
     def define_nursery_hash_base(cls):
         from rpython.rlib.debug import debug_print
-        
+
         class A:
             pass
         def fn():
@@ -1492,7 +1493,7 @@
             debug_print("objects", len(objects))
             for i in range(len(objects)):
                 debug_print(i)
-                assert compute_identity_hash(objects[i]) == hashes[i]
+                assert_(compute_identity_hash(objects[i]) == hashes[i])
                 debug_print("storing in dict")
                 unique[hashes[i]] = None
                 debug_print("done")
@@ -1528,10 +1529,10 @@
         def check(lst):
             hashes = []
             for i, (s, a) in enumerate(lst):
-                assert a.x == i
+                assert_(a.x == i)
                 rgc.ll_write_final_null_char(s)
             for i, (s, a) in enumerate(lst):
-                assert a.x == i     # check it was not overwritten
+                assert_(a.x == i)     # check it was not overwritten
         def fn():
             check(prebuilt)
             lst1 = []
@@ -1733,7 +1734,7 @@
             assert popen.wait() in (-6, 134)     # aborted
             # note: it seems that on some systems we get 134 and on
             # others we get -6.  Bash is supposed to translate the
-            # SIGABRT (signal 6) from the subprocess into the exit 
+            # SIGABRT (signal 6) from the subprocess into the exit
             # code 128+6, but I guess it may not always do so.
             assert 'out of memory:' in child_stderr
             return '42'

From pypy.commits at gmail.com  Sun Nov  5 10:28:07 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 05 Nov 2017 07:28:07 -0800 (PST)
Subject: [pypy-commit] pypy default: kill test that has been disabled for 6
 years
Message-ID: <59ff2e07.90051c0a.ada36.94b2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92949:50ba491d0e92
Date: 2017-11-05 15:27 +0000
http://bitbucket.org/pypy/pypy/changeset/50ba491d0e92/

Log:	kill test that has been disabled for 6 years

diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py
--- a/rpython/jit/metainterp/test/test_del.py
+++ b/rpython/jit/metainterp/test/test_del.py
@@ -82,46 +82,5 @@
         assert res == 1
         self.check_resops(call_r=1)   # for the case B(), but not for the case A()
 
-    def test_keepalive(self):
-        py.test.skip("XXX fails")   # hum, I think the test itself is broken
-        #
-        mydriver = JitDriver(reds = ['n', 'states'], greens = [])
-        class State:
-            num = 1
-        class X:
-            def __init__(self, state):
-                self.state = state
-            def __del__(self):
-                self.state.num += 1
-        @dont_look_inside
-        def do_stuff():
-            pass
-        def f(n):
-            states = []
-            while n > 0:
-                mydriver.jit_merge_point(n=n, states=states)
-                state = State()
-                states.append(state)
-                x = X(state)
-                do_stuff()
-                state.num *= 1000
-                do_stuff()
-                keepalive_until_here(x)
-                n -= 1
-            return states
-        def main(n):
-            states = f(n)
-            rgc.collect()
-            rgc.collect()
-            err = 1001
-            for state in states:
-                if state.num != 1001:
-                    err = state.num
-                    print 'ERROR:', err
-            return err
-        assert main(20) == 1001
-        res = self.meta_interp(main, [20])
-        assert res == 1001
-
 class TestLLtype(DelTests, LLJitMixin):
     pass

From pypy.commits at gmail.com  Sun Nov  5 14:28:05 2017
From: pypy.commits at gmail.com (mattip)
Date: Sun, 05 Nov 2017 11:28:05 -0800 (PST)
Subject: [pypy-commit] pypy default: add method used in matplotlib
Message-ID: <59ff6645.07d81c0a.c75d9.0cb1@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92950:bab05da3f317
Date: 2017-11-05 21:17 +0200
http://bitbucket.org/pypy/pypy/changeset/bab05da3f317/

Log:	add method used in matplotlib

diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py
--- a/lib_pypy/_tkinter/app.py
+++ b/lib_pypy/_tkinter/app.py
@@ -180,6 +180,9 @@
             if err == tklib.TCL_ERROR:
                 self.raiseTclError()
 
+    def interpaddr(self):
+        return int(tkffi.cast('size_t', self.interp))
+
     def _var_invoke(self, func, *args, **kwargs):
         if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread():
             # The current thread is not the interpreter thread.

From pypy.commits at gmail.com  Sun Nov  5 15:50:00 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 05 Nov 2017 12:50:00 -0800 (PST)
Subject: [pypy-commit] pypy assert-rewrite: more assert fixes
Message-ID: <59ff7978.03a7df0a.ebb7f.4749@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: assert-rewrite
Changeset: r92951:7318052f560c
Date: 2017-11-05 20:49 +0000
http://bitbucket.org/pypy/pypy/changeset/7318052f560c/

Log:	more assert fixes

diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -3,6 +3,7 @@
 import py
 import weakref
 
+from rpython.rlib.objectmodel import assert_
 from rpython.rlib import rgc
 from rpython.jit.codewriter.policy import StopAtXPolicy
 from rpython.jit.metainterp import history
@@ -121,7 +122,7 @@
                     res += ovfcheck(x * x)
                     y -= 1
                 except OverflowError:
-                    assert 0
+                    assert_(0)
             return res
         res = self.meta_interp(f, [6, 7])
         assert res == 1323
@@ -157,7 +158,7 @@
                 try:
                     res += ovfcheck(x * x) + b
                 except OverflowError:
-                    assert 0
+                    assert_(0)
                 y -= 1
             return res
         res = self.meta_interp(f, [6, 7])
@@ -793,7 +794,7 @@
                     return llop.int_between(lltype.Bool, arg1, arg2, arg3)
             """ % locals()).compile() in loc
             res = self.interp_operations(loc['f'], [5, 6, 7])
-            assert res == expect_result
+            assert_(res == expect_result)
             self.check_operations_history(expect_operations)
         #
         check('n', 'm', 'p', True,  int_sub=2, uint_lt=1)
@@ -997,7 +998,7 @@
             while i < 10:
                 myjitdriver.can_enter_jit(i=i, t=t)
                 myjitdriver.jit_merge_point(i=i, t=t)
-                assert i > 0
+                assert_(i > 0)
                 t += int_c_div(100, i) - int_c_mod(100, i)
                 i += 1
             return t
@@ -1220,7 +1221,7 @@
         # to the backend at all: ZeroDivisionError
         #
         def f(n):
-            assert n >= 0
+            assert_(n >= 0)
             try:
                 return ovfcheck(5 % n)
             except ZeroDivisionError:
@@ -1231,7 +1232,7 @@
         assert res == -666
         #
         def f(n):
-            assert n >= 0
+            assert_(n >= 0)
             try:
                 return ovfcheck(6 // n)
             except ZeroDivisionError:
@@ -1350,7 +1351,7 @@
             else:
                 obj = A()
                 obj.a = 17
-            assert isinstance(obj, B)
+            assert_(isinstance(obj, B))
             return obj.a
         res = self.interp_operations(fn, [1])
         assert res == 1
@@ -1922,8 +1923,8 @@
             a2 = f(A(x), y)
             b1 = f(B(x), y)
             b2 = f(B(x), y)
-            assert a1.val == a2.val
-            assert b1.val == b2.val
+            assert_(a1.val == a2.val)
+            assert_(b1.val == b2.val)
             return a1.val + b1.val
         res = self.meta_interp(g, [6, 7])
         assert res == 6*8 + 6**8
@@ -1966,8 +1967,8 @@
             a2 = f(A(x), y)
             b1 = f(B(x), y)
             b2 = f(B(x), y)
-            assert a1.val == a2.val
-            assert b1.val == b2.val
+            assert_(a1.val == a2.val)
+            assert_(b1.val == b2.val)
             return a1.val + b1.val
         res = self.meta_interp(g, [6, 20])
         assert res == g(6, 20)
@@ -2001,16 +2002,16 @@
         def g(x, y):
             a1 = f(A(x), y, A(x))
             a2 = f(A(x), y, A(x))
-            assert a1.val == a2.val
+            assert_(a1.val == a2.val)
             b1 = f(B(x), y, B(x))
             b2 = f(B(x), y, B(x))
-            assert b1.val == b2.val
+            assert_(b1.val == b2.val)
             c1 = f(B(x), y, A(x))
             c2 = f(B(x), y, A(x))
-            assert c1.val == c2.val
+            assert_(c1.val == c2.val)
             d1 = f(A(x), y, B(x))
             d2 = f(A(x), y, B(x))
-            assert d1.val == d2.val
+            assert_(d1.val == d2.val)
             return a1.val + b1.val + c1.val + d1.val
         res = self.meta_interp(g, [3, 14])
         assert res == g(3, 14)
@@ -2041,7 +2042,7 @@
         def g(x, y):
             c1 = f(A(x), y, B(x))
             c2 = f(A(x), y, B(x))
-            assert c1.val == c2.val
+            assert_(c1.val == c2.val)
             return c1.val
         res = self.meta_interp(g, [3, 16])
         assert res == g(3, 16)
@@ -2068,7 +2069,7 @@
         def g(x, y):
             a1 = f(A(x), y, A(x))
             a2 = f(A(x), y, A(x))
-            assert a1.val == a2.val
+            assert_(a1.val == a2.val)
             return a1.val
         res = self.meta_interp(g, [3, 14])
         assert res == g(3, 14)
@@ -2093,7 +2094,7 @@
         def g(x, y):
             a1 = f(A(x), y)
             a2 = f(A(x), y)
-            assert a1.val == a2.val
+            assert_(a1.val == a2.val)
             return a1.val
         res = self.meta_interp(g, [6, 14])
         assert res == g(6, 14)
@@ -2120,7 +2121,7 @@
         def g(x, y):
             a1 = f(A(x), y)
             a2 = f(A(x), y)
-            assert a1.val == a2.val
+            assert_(a1.val == a2.val)
             return a1.val
         res = self.meta_interp(g, [6, 14])
         assert res == g(6, 14)
@@ -2156,8 +2157,8 @@
             a2 = f(A(x), y)
             b1 = f(B(x), y)
             b2 = f(B(x), y)
-            assert a1.val == a2.val
-            assert b1.val == b2.val
+            assert_(a1.val == a2.val)
+            assert_(b1.val == b2.val)
             return a1.val + b1.val
         res = self.meta_interp(g, [3, 23])
         assert res == 7068153
@@ -2730,7 +2731,7 @@
                 try:
                     sa += ovfcheck(i + i)
                 except OverflowError:
-                    assert 0
+                    assert_(0)
                 node1 = A(i)
                 i += 1
         assert self.meta_interp(f, [20, 7]) == f(20, 7)
@@ -2762,7 +2763,7 @@
                     sa += 1
                 else:
                     sa += 2
-                    assert  -100 < i < 100
+                    assert_(-100 < i < 100)
                 i += 1
             return sa
         assert self.meta_interp(f, [20]) == f(20)
@@ -2783,7 +2784,7 @@
                     sa += 1
                 else:
                     sa += 2
-                    assert  -100 <= node.val <= 100
+                    assert_(-100 <= node.val <= 100)
                 i += 1
             return sa
         assert self.meta_interp(f, [20]) == f(20)
@@ -3863,13 +3864,13 @@
         def f(x):
             a = make(x)
             if x > 0:
-                assert isinstance(a, A)
+                assert_(isinstance(a, A))
                 z = a.f()
             elif x < 0:
-                assert isinstance(a, B)
+                assert_(isinstance(a, B))
                 z = a.f()
             else:
-                assert isinstance(a, C)
+                assert_(isinstance(a, C))
                 z = a.f()
             return z + a.g()
         res1 = f(6)
@@ -4285,7 +4286,7 @@
                     return x > x or x > x
                 if cmp == 'ge':
                     return x >= x and x >= x
-                assert 0
+                assert_(0)
             return f
 
         def make_str(cmp):
@@ -4295,7 +4296,7 @@
                     return x is x or x is x
                 if cmp == 'ne':
                     return x is not x and x is not x
-                assert 0
+                assert_(0)
             return f
 
         def make_object(cmp):
@@ -4307,7 +4308,7 @@
                     return x is x
                 if cmp == 'ne':
                     return x is not x
-                assert 0
+                assert_(0)
             return f
 
         for cmp in 'eq ne lt le gt ge'.split():
diff --git a/rpython/jit/metainterp/test/test_bytearray.py b/rpython/jit/metainterp/test/test_bytearray.py
--- a/rpython/jit/metainterp/test/test_bytearray.py
+++ b/rpython/jit/metainterp/test/test_bytearray.py
@@ -1,13 +1,14 @@
 import py
+from rpython.rlib.objectmodel import assert_
 from rpython.jit.metainterp.test.support import LLJitMixin
-from rpython.rlib.jit import JitDriver, dont_look_inside
+from rpython.rlib.jit import dont_look_inside
 
 class TestByteArray(LLJitMixin):
 
     def test_getitem(self):
         x = bytearray("foobar")
         def fn(n):
-            assert n >= 0
+            assert_(n >= 0)
             return x[n]
         res = self.interp_operations(fn, [3])
         assert res == ord('b')
@@ -31,7 +32,7 @@
         def make_me():
             return bytearray("foobar")
         def fn(n):
-            assert n >= 0
+            assert_(n >= 0)
             x = make_me()
             x[n] = 3
             return x[3] + 1000 * x[4]
diff --git a/rpython/jit/metainterp/test/test_call.py b/rpython/jit/metainterp/test/test_call.py
--- a/rpython/jit/metainterp/test/test_call.py
+++ b/rpython/jit/metainterp/test/test_call.py
@@ -1,4 +1,4 @@
-
+from rpython.rlib.objectmodel import assert_
 from rpython.jit.metainterp.test.support import LLJitMixin, noConst
 from rpython.rlib import jit
 
@@ -146,8 +146,8 @@
             while n > 0:
                 myjitdriver.can_enter_jit(n=n, p=p, m=m)
                 myjitdriver.jit_merge_point(n=n, p=p, m=m)
-                assert p > -1
-                assert p < 1
+                assert_(p > -1)
+                assert_(p < 1)
                 n -= jit.conditional_call_elidable(p, externfn, n)
             return n
         res = self.meta_interp(f, [21, 5, 0])
@@ -165,8 +165,8 @@
             while n > 0:
                 myjitdriver.can_enter_jit(n=n, p=p, m=m)
                 myjitdriver.jit_merge_point(n=n, p=p, m=m)
-                assert p > -1
-                assert p < 1
+                assert_(p > -1)
+                assert_(p < 1)
                 n0 = n
                 n -= jit.conditional_call_elidable(p, externfn, n0)
                 n -= jit.conditional_call_elidable(p, externfn, n0)
diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py
--- a/rpython/jit/metainterp/test/test_del.py
+++ b/rpython/jit/metainterp/test/test_del.py
@@ -1,6 +1,6 @@
 import py
 from rpython.rlib.jit import JitDriver, dont_look_inside
-from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib.objectmodel import keepalive_until_here, assert_
 from rpython.rlib import rgc
 from rpython.jit.metainterp.test.support import LLJitMixin
 
@@ -30,7 +30,7 @@
                            'jump': 1})
 
     def test_class_of_allocated(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
+        myjitdriver = JitDriver(greens=[], reds=['n', 'x'])
         class Foo:
             def __del__(self):
                 pass
@@ -49,16 +49,15 @@
                 myjitdriver.jit_merge_point(x=x, n=n)
                 x = X()
                 y = Y()
-                assert x.f() == 456
-                assert y.f() == 123
+                assert_(x.f() == 456)
+                assert_(y.f() == 123)
                 n -= 1
             return 42
         res = self.meta_interp(f, [20])
         assert res == 42
 
     def test_instantiate_with_or_without_del(self):
-        import gc
-        mydriver = JitDriver(reds = ['n', 'x'], greens = [])
+        mydriver = JitDriver(reds=['n', 'x'], greens=[])
         class Base: pass
         class A(Base): foo = 72
         class B(Base):
diff --git a/rpython/jit/metainterp/test/test_dict.py b/rpython/jit/metainterp/test/test_dict.py
--- a/rpython/jit/metainterp/test/test_dict.py
+++ b/rpython/jit/metainterp/test/test_dict.py
@@ -1,8 +1,8 @@
+from collections import OrderedDict
 import py
+from rpython.rlib.objectmodel import assert_, r_dict, compute_hash
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.rlib.jit import JitDriver
-from rpython.rlib import objectmodel
-from collections import OrderedDict
 
 class DictTests:
     @staticmethod
@@ -104,7 +104,7 @@
             return (x & 1) == (y & 1)
 
         def f(n):
-            dct = objectmodel.r_dict(eq, key)
+            dct = r_dict(eq, key)
             total = n
             while total:
                 myjitdriver.jit_merge_point(total=total, dct=dct)
@@ -145,7 +145,7 @@
             return (x & 1) == (y & 1)
 
         def f(n):
-            dct = objectmodel.r_dict(eq, key)
+            dct = r_dict(eq, key)
             total = n
             while total:
                 myjitdriver.jit_merge_point(total=total, dct=dct)
@@ -169,13 +169,13 @@
         def eq_func(a, b):
             return a.value == b.value
         def hash_func(x):
-            return objectmodel.compute_hash(x.value)
+            return compute_hash(x.value)
 
         def f(n):
             d = None
             while n > 0:
                 myjitdriver.jit_merge_point(n=n, d=d)
-                d = objectmodel.r_dict(eq_func, hash_func)
+                d = r_dict(eq_func, hash_func)
                 y = Wrapper(str(n))
                 d[y] = n - 1
                 n = d[y]
@@ -331,7 +331,7 @@
             return (x % 2) == (y % 2)
 
         def f(n):
-            dct = objectmodel.r_dict(eq, key)
+            dct = r_dict(eq, key)
             total = n
             x = 44444
             y = 55555
@@ -398,7 +398,7 @@
             d[2] = 6
             d[1] = 4
             lst = d.items()
-            assert len(lst) == 4
+            assert_(len(lst) == 4)
             return (    lst[0][0] +       10*lst[0][1] +
                     100*lst[1][0] +     1000*lst[1][1] +
                   10000*lst[3][0] +   100000*lst[2][1] +
diff --git a/rpython/jit/metainterp/test/test_exception.py b/rpython/jit/metainterp/test/test_exception.py
--- a/rpython/jit/metainterp/test/test_exception.py
+++ b/rpython/jit/metainterp/test/test_exception.py
@@ -2,7 +2,7 @@
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.rlib.jit import JitDriver, dont_look_inside
 from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask
-from rpython.rlib.objectmodel import keepalive_until_here
+from rpython.rlib.objectmodel import keepalive_until_here, assert_
 from rpython.jit.codewriter.policy import StopAtXPolicy
 from rpython.rtyper.lltypesystem import lltype, rffi
 
@@ -633,11 +633,11 @@
                 try:
                     rescall(i)
                 except KeyError:
-                    assert i < 10
+                    assert_(i < 10)
                 except ValueError:
-                    assert i >= 20
+                    assert_(i >= 20)
                 else:
-                    assert 10 <= i < 20
+                    assert_(10 <= i < 20)
                 i += 1
             return i
         res = self.meta_interp(f, [0], inline=True)
diff --git a/rpython/jit/metainterp/test/test_fficall.py b/rpython/jit/metainterp/test/test_fficall.py
--- a/rpython/jit/metainterp/test/test_fficall.py
+++ b/rpython/jit/metainterp/test/test_fficall.py
@@ -6,6 +6,7 @@
 from rpython.rtyper.annlowlevel import llhelper
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.jit.codewriter.longlong import is_longlong, is_64_bit
+from rpython.rlib.objectmodel import assert_
 from rpython.rlib import jit
 from rpython.rlib import jit_libffi
 from rpython.rlib.jit_libffi import (types, CIF_DESCRIPTION, FFI_TYPE_PP,
@@ -31,11 +32,11 @@
     Context manager to monkey patch jit_libffi with our custom "libffi-like"
     function
     """
-    
+
     def __init__(self, fake_call_impl_any):
         self.fake_call_impl_any = fake_call_impl_any
         self.monkey = monkeypatch()
-        
+
     def __enter__(self, *args):
         self.monkey.setattr(jit_libffi, 'jit_ffi_call_impl_any', self.fake_call_impl_any)
 
@@ -61,7 +62,7 @@
                 if (lltype.typeOf(exp_a) == rffi.ULONG and
                     lltype.typeOf(a) == lltype.Signed):
                     a = rffi.cast(rffi.ULONG, a)
-                assert a == exp_a
+                assert_(a == exp_a)
             return rvalue
         FUNC = lltype.FuncType([lltype.typeOf(avalue) for avalue in avalues],
                                lltype.typeOf(rvalue))
@@ -88,7 +89,7 @@
                       lltype.typeOf(avalue) is rffi.UCHAR):
                     got = intmask(got)
                     avalue = intmask(avalue)
-                assert got == avalue
+                assert_(got == avalue)
                 ofs += 16
             write_to_ofs = 0
             if rvalue is not None:
@@ -312,7 +313,7 @@
                 # call_release_gil was simply lost and when guard_not_forced
                 # failed, and the value of "res" was unpredictable.
                 # See commit b84ff38f34bd and subsequents.
-                assert res == n*2
+                assert_(res == n*2)
                 jit.virtual_ref_finish(vref, xy)
                 exctx.topframeref = jit.vref_None
                 n += 1
@@ -322,7 +323,7 @@
             assert f() == 100
             res = self.meta_interp(f, [])
             assert res == 100
-        
+
 
 class TestFfiCall(FfiCallTests, LLJitMixin):
     def test_jit_ffi_vref(self):
@@ -349,7 +350,7 @@
             #
             jit_ffi_prep_cif(cd)
             #
-            assert rffi.sizeof(rffi.DOUBLE) == 8
+            assert_(rffi.sizeof(rffi.DOUBLE) == 8)
             exb = lltype.malloc(rffi.DOUBLEP.TO, 8, flavor='raw')
             exb[2] = 1.23
             jit_ffi_call(cd, math_sin, rffi.cast(rffi.CCHARP, exb))
diff --git a/rpython/jit/metainterp/test/test_jitiface.py b/rpython/jit/metainterp/test/test_jitiface.py
--- a/rpython/jit/metainterp/test/test_jitiface.py
+++ b/rpython/jit/metainterp/test/test_jitiface.py
@@ -1,19 +1,18 @@
 
-import py
-from rpython.rlib.jit import JitDriver, JitHookInterface, Counters, dont_look_inside
+from rpython.rlib.objectmodel import assert_
+from rpython.rlib.jit import (
+    JitDriver, JitHookInterface, Counters, dont_look_inside)
 from rpython.rlib import jit_hooks
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.jit.codewriter.policy import JitPolicy
-from rpython.jit.metainterp.resoperation import rop
-from rpython.rtyper.annlowlevel import hlstr, cast_instance_to_gcref
+from rpython.rtyper.annlowlevel import cast_instance_to_gcref
 from rpython.jit.metainterp.jitprof import Profiler, EmptyProfiler
-from rpython.jit.codewriter.policy import JitPolicy
 
 
 class JitHookInterfaceTests(object):
     # !!!note!!! - don't subclass this from the backend. Subclass the LL
     # class later instead
-    
+
     def test_abort_quasi_immut(self):
         reasons = []
 
@@ -71,7 +70,7 @@
 
         iface = MyJitIface()
 
-        driver = JitDriver(greens = ['n', 'm'], reds = ['i'])
+        driver = JitDriver(greens=['n', 'm'], reds=['i'])
 
         def loop(n, m):
             i = 0
@@ -94,7 +93,7 @@
 
     def test_on_compile_bridge(self):
         called = []
-        
+
         class MyJitIface(JitHookInterface):
             def after_compile(self, di):
                 called.append("compile")
@@ -104,8 +103,8 @@
 
             def before_compile_bridge(self, di):
                 called.append("before_compile_bridge")
-            
-        driver = JitDriver(greens = ['n', 'm'], reds = ['i'])
+
+        driver = JitDriver(greens=['n', 'm'], reds=['i'])
 
         def loop(n, m):
             i = 0
@@ -120,7 +119,7 @@
         assert called == ["compile", "before_compile_bridge", "compile_bridge"]
 
     def test_get_stats(self):
-        driver = JitDriver(greens = [], reds = ['i', 's'])
+        driver = JitDriver(greens=[], reds=['i', 's'])
 
         def loop(i):
             s = 0
@@ -134,31 +133,33 @@
 
         def main():
             loop(30)
-            assert jit_hooks.stats_get_counter_value(None,
-                                           Counters.TOTAL_COMPILED_LOOPS) == 1
-            assert jit_hooks.stats_get_counter_value(None,
-                                           Counters.TOTAL_COMPILED_BRIDGES) == 1
-            assert jit_hooks.stats_get_counter_value(None,
-                                                     Counters.TRACING) == 2
-            assert jit_hooks.stats_get_times_value(None, Counters.TRACING) >= 0
+            assert_(jit_hooks.stats_get_counter_value(
+                None, Counters.TOTAL_COMPILED_LOOPS) == 1)
+            assert_(jit_hooks.stats_get_counter_value(
+                None, Counters.TOTAL_COMPILED_BRIDGES) == 1)
+            assert_(jit_hooks.stats_get_counter_value(
+                None, Counters.TRACING) == 2)
+            assert_(jit_hooks.stats_get_times_value(
+                None, Counters.TRACING) >= 0)
 
         self.meta_interp(main, [], ProfilerClass=Profiler)
 
     def test_get_stats_empty(self):
-        driver = JitDriver(greens = [], reds = ['i'])
+        driver = JitDriver(greens=[], reds=['i'])
         def loop(i):
             while i > 0:
                 driver.jit_merge_point(i=i)
                 i -= 1
         def main():
             loop(30)
-            assert jit_hooks.stats_get_counter_value(None,
-                                           Counters.TOTAL_COMPILED_LOOPS) == 0
-            assert jit_hooks.stats_get_times_value(None, Counters.TRACING) == 0
+            assert_(jit_hooks.stats_get_counter_value(
+                None, Counters.TOTAL_COMPILED_LOOPS) == 0)
+            assert_(jit_hooks.stats_get_times_value(
+                None, Counters.TRACING) == 0)
         self.meta_interp(main, [], ProfilerClass=EmptyProfiler)
 
     def test_get_jitcell_at_key(self):
-        driver = JitDriver(greens = ['s'], reds = ['i'], name='jit')
+        driver = JitDriver(greens=['s'], reds=['i'], name='jit')
 
         def loop(i, s):
             while i > s:
@@ -167,17 +168,17 @@
 
         def main(s):
             loop(30, s)
-            assert jit_hooks.get_jitcell_at_key("jit", s)
-            assert not jit_hooks.get_jitcell_at_key("jit", s + 1)
+            assert_(jit_hooks.get_jitcell_at_key("jit", s))
+            assert_(not jit_hooks.get_jitcell_at_key("jit", s + 1))
             jit_hooks.trace_next_iteration("jit", s + 1)
             loop(s + 3, s + 1)
-            assert jit_hooks.get_jitcell_at_key("jit", s + 1)
+            assert_(jit_hooks.get_jitcell_at_key("jit", s + 1))
 
         self.meta_interp(main, [5])
         self.check_jitcell_token_count(2)
 
     def test_get_jitcell_at_key_ptr(self):
-        driver = JitDriver(greens = ['s'], reds = ['i'], name='jit')
+        driver = JitDriver(greens=['s'], reds=['i'], name='jit')
 
         class Green(object):
             pass
@@ -193,17 +194,17 @@
             g1_ptr = cast_instance_to_gcref(g1)
             g2_ptr = cast_instance_to_gcref(g2)
             loop(10, g1)
-            assert jit_hooks.get_jitcell_at_key("jit", g1_ptr)
-            assert not jit_hooks.get_jitcell_at_key("jit", g2_ptr)
+            assert_(jit_hooks.get_jitcell_at_key("jit", g1_ptr))
+            assert_(not jit_hooks.get_jitcell_at_key("jit", g2_ptr))
             jit_hooks.trace_next_iteration("jit", g2_ptr)
             loop(2, g2)
-            assert jit_hooks.get_jitcell_at_key("jit", g2_ptr)
+            assert_(jit_hooks.get_jitcell_at_key("jit", g2_ptr))
 
         self.meta_interp(main, [5])
         self.check_jitcell_token_count(2)
 
     def test_dont_trace_here(self):
-        driver = JitDriver(greens = ['s'], reds = ['i', 'k'], name='jit')
+        driver = JitDriver(greens=['s'], reds=['i', 'k'], name='jit')
 
         def loop(i, s):
             k = 4
@@ -228,10 +229,10 @@
         self.check_resops(call_assembler_n=8)
 
     def test_trace_next_iteration_hash(self):
-        driver = JitDriver(greens = ['s'], reds = ['i'], name="name")
+        driver = JitDriver(greens=['s'], reds=['i'], name="name")
         class Hashes(object):
             check = False
-            
+
             def __init__(self):
                 self.l = []
                 self.t = []
@@ -281,9 +282,9 @@
 
 class LLJitHookInterfaceTests(JitHookInterfaceTests):
     # use this for any backend, instead of the super class
-    
+
     def test_ll_get_stats(self):
-        driver = JitDriver(greens = [], reds = ['i', 's'])
+        driver = JitDriver(greens=[], reds=['i', 's'])
 
         def loop(i):
             s = 0
@@ -292,7 +293,7 @@
                 if i % 2:
                     s += 1
                 i -= 1
-                s+= 2
+                s += 2
             return s
 
         def main(b):
@@ -300,27 +301,27 @@
             loop(30)
             l = jit_hooks.stats_get_loop_run_times(None)
             if b:
-                assert len(l) == 4
+                assert_(len(l) == 4)
                 # completely specific test that would fail each time
                 # we change anything major. for now it's 4
                 # (loop, bridge, 2 entry points)
-                assert l[0].type == 'e'
-                assert l[0].number == 0
-                assert l[0].counter == 4
-                assert l[1].type == 'l'
-                assert l[1].counter == 4
-                assert l[2].type == 'l'
-                assert l[2].counter == 23
-                assert l[3].type == 'b'
-                assert l[3].number == 4
-                assert l[3].counter == 11
+                assert_(l[0].type == 'e')
+                assert_(l[0].number == 0)
+                assert_(l[0].counter == 4)
+                assert_(l[1].type == 'l')
+                assert_(l[1].counter == 4)
+                assert_(l[2].type == 'l')
+                assert_(l[2].counter == 23)
+                assert_(l[3].type == 'b')
+                assert_(l[3].number == 4)
+                assert_(l[3].counter == 11)
             else:
-                assert len(l) == 0
+                assert_(len(l) == 0)
         self.meta_interp(main, [True], ProfilerClass=Profiler)
         # this so far does not work because of the way setup_once is done,
         # but fine, it's only about untranslated version anyway
         #self.meta_interp(main, [False], ProfilerClass=Profiler)
-        
+
 
 class TestJitHookInterface(JitHookInterfaceTests, LLJitMixin):
     pass
diff --git a/rpython/jit/metainterp/test/test_loop.py b/rpython/jit/metainterp/test/test_loop.py
--- a/rpython/jit/metainterp/test/test_loop.py
+++ b/rpython/jit/metainterp/test/test_loop.py
@@ -1,19 +1,20 @@
 import py
-from rpython.rlib.jit import JitDriver, hint, set_param, dont_look_inside,\
-     elidable
-from rpython.rlib.objectmodel import compute_hash
+from rpython.rlib.jit import (
+    JitDriver, set_param, dont_look_inside, elidable)
+from rpython.rlib.objectmodel import compute_hash, assert_
+from rpython.rlib.rerased import new_erasing_pair
+from rpython.rtyper.lltypesystem import lltype
+
 from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats
 from rpython.jit.metainterp.test.support import LLJitMixin
 from rpython.jit.codewriter.policy import StopAtXPolicy
-from rpython.jit.metainterp.resoperation import rop
-from rpython.jit.metainterp import history
 
 class LoopTest(object):
     enable_opts = ''
 
     automatic_promotion_result = {
-        'int_add' : 6, 'int_gt' : 1, 'guard_false' : 1, 'jump' : 1,
-        'guard_value' : 3
+        'int_add': 6, 'int_gt': 1, 'guard_false': 1, 'jump': 1,
+        'guard_value': 3
     }
 
     def meta_interp(self, f, args, policy=None, backendopt=False):
@@ -26,7 +27,8 @@
         return f(*args)
 
     def test_simple_loop(self):
-        myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res'])
+        myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res'])
+
         def f(x, y):
             res = 0
             while y > 0:
@@ -40,7 +42,8 @@
         self.check_trace_count(1)
 
     def test_loop_with_delayed_setfield(self):
-        myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res', 'a'])
+        myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res', 'a'])
+
         class A(object):
             def __init__(self):
                 self.x = 3
@@ -67,7 +70,7 @@
     def test_loop_with_two_paths(self):
         from rpython.rtyper.lltypesystem import lltype
         from rpython.rtyper.lltypesystem.lloperation import llop
-        myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res'])
+        myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res'])
 
         def l(y, x, t):
             llop.debug_print(lltype.Void, y, x, t)
@@ -96,7 +99,7 @@
             self.check_trace_count(2)
 
     def test_alternating_loops(self):
-        myjitdriver = JitDriver(greens = [], reds = ['pattern'])
+        myjitdriver = JitDriver(greens=[], reds=['pattern'])
         def f(pattern):
             while pattern > 0:
                 myjitdriver.can_enter_jit(pattern=pattern)
@@ -114,7 +117,7 @@
             self.check_trace_count(2)
 
     def test_interp_simple(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'y'])
         bytecode = "bedca"
         def f(x, y):
             i = 0
@@ -139,7 +142,7 @@
         self.check_trace_count(0)
 
     def test_green_prevents_loop(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'y'])
         bytecode = "+--+++++----"
         def f(x, y):
             i = 0
@@ -158,7 +161,7 @@
         self.check_trace_count(0)
 
     def test_interp_single_loop(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'y'])
         bytecode = "abcd"
         def f(x, y):
             i = 0
@@ -201,7 +204,7 @@
                 assert found == 1
 
     def test_interp_many_paths(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'node'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'node'])
         NODE = self._get_NODE()
         bytecode = "xxxxxxxb"
         def f(node):
@@ -240,7 +243,7 @@
         oldlimit = sys.getrecursionlimit()
         try:
             sys.setrecursionlimit(10000)
-            myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'node'])
+            myjitdriver = JitDriver(greens=['i'], reds=['x', 'node'])
             NODE = self._get_NODE()
             bytecode = "xxxxxxxb"
 
@@ -281,7 +284,7 @@
             sys.setrecursionlimit(oldlimit)
 
     def test_nested_loops(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'y'])
         bytecode = "abc<de"
         def f(x, y):
             i = 0
@@ -317,7 +320,7 @@
         assert res == expected
 
     def test_loop_in_bridge1(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y', 'res'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x', 'y', 'res'])
         bytecode = "abs>cxXyY"
         def f(y):
             res = x = 0
@@ -375,7 +378,7 @@
         #           x = x + (i&j)
         #       i = i + 1
 
-        myjitdriver = JitDriver(greens = ['pos'], reds = ['i', 'j', 'n', 'x'])
+        myjitdriver = JitDriver(greens=['pos'], reds=['i', 'j', 'n', 'x'])
         bytecode = "IzJxji"
         def f(n, threshold):
             set_param(myjitdriver, 'threshold', threshold)
@@ -422,7 +425,7 @@
                 self.val = val
             def add(self, val):
                 return A(self.val + val)
-        myjitdriver = JitDriver(greens = ['pos'], reds = ['i', 'j', 'n', 'x'])
+        myjitdriver = JitDriver(greens=['pos'], reds=['i', 'j', 'n', 'x'])
         bytecode = "IzJxji"
         def f(nval, threshold):
             set_param(myjitdriver, 'threshold', threshold)
@@ -464,7 +467,7 @@
             assert res == expected
 
     def test_two_bridged_loops(self):
-        myjitdriver = JitDriver(greens = ['pos'], reds = ['i', 'n', 's', 'x'])
+        myjitdriver = JitDriver(greens=['pos'], reds=['i', 'n', 's', 'x'])
         bytecode = "zI7izI8i"
         def f(n, s):
             i = x = 0
@@ -510,7 +513,7 @@
 
 
     def test_two_bridged_loops_classes(self):
-        myjitdriver = JitDriver(greens = ['pos'], reds = ['i', 'n', 'x', 's'])
+        myjitdriver = JitDriver(greens=['pos'], reds=['i', 'n', 'x', 's'])
         class A(object):
             pass
         bytecode = "I7i"
@@ -554,10 +557,10 @@
 
 
     def test_three_nested_loops(self):
-        myjitdriver = JitDriver(greens = ['i'], reds = ['x'])
+        myjitdriver = JitDriver(greens=['i'], reds=['x'])
         bytecode = ".+357"
         def f(x):
-            assert x >= 0
+            assert_(x >= 0)
             i = 0
             while i < len(bytecode):
                 myjitdriver.jit_merge_point(i=i, x=x)
@@ -590,7 +593,7 @@
         assert res == expected
 
     def test_unused_loop_constant(self):
-        myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'z'])
+        myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'z'])
         def f(x, y, z):
             while z > 0:
                 myjitdriver.can_enter_jit(x=x, y=y, z=z)
@@ -603,7 +606,7 @@
         assert res == expected
 
     def test_loop_unicode(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
+        myjitdriver = JitDriver(greens=[], reds=['n', 'x'])
         def f(n):
             x = u''
             while n > 13:
@@ -617,7 +620,7 @@
         assert res == expected
 
     def test_loop_string(self):
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'x'])
+        myjitdriver = JitDriver(greens=[], reds=['n', 'x'])
         def f(n):
             x = ''
             while n > 13:
@@ -632,7 +635,7 @@
         assert res == expected
 
     def test_adapt_bridge_to_merge_point(self):
-        myjitdriver = JitDriver(greens = [], reds = ['x', 'z'])
+        myjitdriver = JitDriver(greens=[], reds=['x', 'z'])
 
         class Z(object):
             def __init__(self, elem):
@@ -812,7 +815,7 @@
         self.check_trace_count(2)
 
     def test_path_with_operations_not_from_start(self):
-        jitdriver = JitDriver(greens = ['k'], reds = ['n', 'z'])
+        jitdriver = JitDriver(greens=['k'], reds=['n', 'z'])
 
         def f(n):
             k = 0
@@ -831,11 +834,11 @@
                 n -= 1
             return 42
 
-        res = self.meta_interp(f, [200])
+        self.meta_interp(f, [200])
 
 
     def test_path_with_operations_not_from_start_2(self):
-        jitdriver = JitDriver(greens = ['k'], reds = ['n', 'z', 'stuff'])
+        jitdriver = JitDriver(greens=['k'], reds=['n', 'z', 'stuff'])
 
         class Stuff(object):
             def __init__(self, n):
@@ -869,7 +872,8 @@
         BASE = lltype.GcStruct('BASE')
         A = lltype.GcStruct('A', ('parent', BASE), ('val', lltype.Signed))
         B = lltype.GcStruct('B', ('parent', BASE), ('charval', lltype.Char))
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        myjitdriver = JitDriver(greens=[], reds=['n', 'm', 'i', 'j', 'sa', 'p'])
+
         def f(n, m, j):
             i = sa = 0
             pa = lltype.malloc(A)
@@ -888,22 +892,22 @@
                     pb = lltype.cast_pointer(lltype.Ptr(B), p)
                     sa += ord(pb.charval)
                 sa += 100
-                assert n>0 and m>0
+                assert_(n > 0 and m > 0)
                 i += j
             return sa
         # This is detected as invalid by the codewriter, for now
         py.test.raises(NotImplementedError, self.meta_interp, f, [20, 10, 1])
 
     def test_unerased_pointers_in_short_preamble(self):
-        from rpython.rlib.rerased import new_erasing_pair
-        from rpython.rtyper.lltypesystem import lltype
         class A(object):
             def __init__(self, val):
                 self.val = val
         erase_A, unerase_A = new_erasing_pair('A')
         erase_TP, unerase_TP = new_erasing_pair('TP')
         TP = lltype.GcArray(lltype.Signed)
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p'])
+        myjitdriver = JitDriver(
+            greens=[], reds=['n', 'm', 'i', 'j', 'sa', 'p'])
+
         def f(n, m, j):
             i = sa = 0
             p = erase_A(A(7))
@@ -918,14 +922,13 @@
                 else:
                     sa += unerase_TP(p)[0]
                 sa += A(i).val
-                assert n>0 and m>0
+                assert_(n > 0 and m > 0)
                 i += j
             return sa
         res = self.meta_interp(f, [20, 10, 1])
         assert res == f(20, 10, 1)
 
     def test_boxed_unerased_pointers_in_short_preamble(self):
-        from rpython.rlib.rerased import new_erasing_pair
         from rpython.rtyper.lltypesystem import lltype
         class A(object):
             def __init__(self, val):
@@ -940,7 +943,7 @@
         erase_A, unerase_A = new_erasing_pair('A')
         erase_TP, unerase_TP = new_erasing_pair('TP')
         TP = lltype.GcArray(lltype.Signed)
-        myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'sa', 'p'])
+        myjitdriver = JitDriver(greens=[], reds=['n', 'm', 'i', 'sa', 'p'])
         def f(n, m):
             i = sa = 0
             p = Box(erase_A(A(7)))
@@ -1011,7 +1014,6 @@
         class C(object):
             pass
 
-        from rpython.rlib.rerased import new_erasing_pair
         b_erase, b_unerase = new_erasing_pair("B")
         c_erase, c_unerase = new_erasing_pair("C")
 
@@ -1044,7 +1046,6 @@
     def test_unroll_issue_3(self):
         py.test.skip("decide")
 
-        from rpython.rlib.rerased import new_erasing_pair
         b_erase, b_unerase = new_erasing_pair("B")    # list of ints
         c_erase, c_unerase = new_erasing_pair("C")    # list of Nones
 
@@ -1075,7 +1076,7 @@
         assert res == 420
 
     def test_not_too_many_bridges(self):
-        jitdriver = JitDriver(greens = [], reds = 'auto')
+        jitdriver = JitDriver(greens=[], reds='auto')
 
         def f(i):
             s = 0
@@ -1097,7 +1098,7 @@
 
     def test_sharing_guards(self):
         py.test.skip("unimplemented")
-        driver = JitDriver(greens = [], reds = 'auto')
+        driver = JitDriver(greens=[], reds='auto')
 
         def f(i):
             s = 0
@@ -1145,7 +1146,7 @@
                     v = reverse(W_Cons(pc + 1, W_Cons(pc + 2, W_Cons(pc + 3, W_Cons(pc + 4, W_Nil())))))
                     pc = pc + 1
                 repetitions += 1
-        
+
         self.meta_interp(entry_point, [])
 
 
From pypy.commits at gmail.com  Mon Nov  6 05:40:35 2017
From: pypy.commits at gmail.com (antocuni)
Date: Mon, 06 Nov 2017 02:40:35 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: update the src/shared files to
 vmprof==0.4.10
Message-ID: <5a003c23.035d1c0a.21855.51b6@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92954:e4158aeecc04
Date: 2017-11-06 11:39 +0100
http://bitbucket.org/pypy/pypy/changeset/e4158aeecc04/

Log:	update the src/shared files to vmprof==0.4.10

diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -32,12 +32,21 @@
 static size_t threads_size = 0;
 static size_t thread_count = 0;
 static size_t threads_size_step = 8;
-#endif
 
 int vmprof_get_itimer_type(void) {
     return itimer_type;
 }
 
+int vmprof_get_signal_type(void) {
+    return signal_type;
+}
+#endif
+
+#ifdef VMPROF_WINDOWS
+#include "vmprof_win.h"
+#endif
+
+
 int vmprof_is_enabled(void) {
     return is_enabled;
 }
@@ -62,10 +71,6 @@
     profile_interval_usec = value;
 }
 
-int vmprof_get_signal_type(void) {
-    return signal_type;
-}
-
 char *vmprof_init(int fd, double interval, int memory,
                   int proflines, const char *interp_name, int native, int real_time)
 {
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -15,7 +15,9 @@
 #include <pthread.h>
 #endif
 
+#ifdef VMPROF_UNIX
 #include "vmprof_getpc.h"
+#endif
 
 #ifdef VMPROF_LINUX
 #include <syscall.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
@@ -8,7 +8,7 @@
 #include <mach/task_info.h>
 
 static mach_port_t mach_task;
-#else
+#elif defined(VMPROF_UNIX)
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
@@ -41,8 +41,6 @@
 void vmprof_ignore_signals(int ignored)
 {
     if (ignored) {
-        /* set the last bit, and wait until concurrently-running signal
-           handlers finish */
         __sync_add_and_fetch(&signal_handler_ignore, 1L);
         while (signal_handler_entries != 0L) {
             usleep(1);
@@ -370,7 +368,7 @@
         goto error;
     if (install_sigprof_timer() == -1)
         goto error;
-    vmprof_ignore_signals(0);
+    signal_handler_ignore = 0;
     return 0;
 
  error:
@@ -394,7 +392,7 @@
 
 int vmprof_disable(void)
 {
-    vmprof_ignore_signals(1);
+    signal_handler_ignore = 1;
     vmprof_set_profile_interval_usec(0);
 #ifdef VMP_SUPPORTS_NATIVE_PROFILING
     disable_cpyprof();
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
@@ -1,7 +1,7 @@
-// cannot include this header because it also has definitions
-#include "windows.h"
-#include "compat.h"
-#include "vmp_stack.h"
+#include "vmprof_win.h"
+
+volatile int thread_started = 0;
+volatile int enabled = 0;
 
 HANDLE write_mutex;
 
@@ -12,7 +12,20 @@
     return 0;
 }
 
-#include <tlhelp32.h>
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
+                                     int auto_retry)
+{
+    char buf[2048];
+    long namelen;
+
+    namelen = (long)strnlen(code_name, 1023);
+    buf[0] = MARKER_VIRTUAL_IP;
+    *(intptr_t*)(buf + 1) = code_uid;
+    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
+    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
+    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
+    return 0;
+}
 
 int vmp_write_all(const char *buf, size_t bufsize)
 {
@@ -40,3 +53,168 @@
     return 0;
 }
 
+HANDLE write_mutex;
+
+#include "vmprof_common.h"
+
+int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
+{
+    HRESULT result;
+    HANDLE hThread;
+    int depth;
+    CONTEXT ctx;
+#ifdef RPYTHON_LL2CTYPES
+    return 0; // not much we can do
+#else
+#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
+    return 0; // we can't freeze threads, unsafe
+#else
+    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
+    if (!hThread) {
+        return -1;
+    }
+    result = SuspendThread(hThread);
+    if(result == 0xffffffff)
+        return -1; // possible, e.g. attached debugger or thread alread suspended
+    // find the correct thread
+#ifdef RPYTHON_VMPROF
+    ctx.ContextFlags = CONTEXT_FULL;
+    if (!GetThreadContext(hThread, &ctx))
+        return -1;
+    depth = get_stack_trace(tstate->vmprof_tl_stack,
+                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
+    stack->depth = depth;
+    stack->stack[depth++] = thread_id;
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#else
+    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
+                                      MAX_STACK_DEPTH, 0, 0);
+    stack->depth = depth;
+    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#endif
+
+#endif
+#endif
+}
+
+#ifndef RPYTHON_VMPROF
+static
+PY_WIN_THREAD_STATE * get_current_thread_state(void)
+{
+#if PY_MAJOR_VERSION < 3
+    return _PyThreadState_Current;
+#elif PY_VERSION_HEX < 0x03050200
+    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
+#else
+    return _PyThreadState_UncheckedGet();
+#endif
+}
+#endif
+
+long __stdcall vmprof_mainloop(void *arg)
+{
+#ifdef RPYTHON_LL2CTYPES
+    // for tests only
+    return 0;
+#else
+    // it is not a test case!
+    PY_WIN_THREAD_STATE *tstate;
+    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
+    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
+    int depth;
+#ifndef RPYTHON_VMPROF
+    // cpython version
+    while (1) {
+        Sleep(vmprof_get_profile_interval_usec() * 1000);
+        if (!enabled) {
+            continue;
+        }
+        tstate = get_current_thread_state();
+        if (!tstate)
+            continue;
+        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
+        if (depth > 0) {
+            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
+        }
+    }
+#else
+    // pypy version
+    while (1) {
+        //Sleep(vmprof_get_profile_interval_usec() * 1000);
+        Sleep(10);
+        if (!enabled) {
+            continue;
+        }
+        _RPython_ThreadLocals_Acquire();
+        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
+        tstate = _RPython_ThreadLocals_Enum(tstate);
+        while (tstate) {
+            if (tstate->ready == 42) {
+                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
+                if (depth > 0) {
+                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                         depth * sizeof(void *) +
+                         sizeof(struct prof_stacktrace_s) -
+                         offsetof(struct prof_stacktrace_s, marker));
+                }
+            }
+            tstate = _RPython_ThreadLocals_Enum(tstate);
+        }
+        _RPython_ThreadLocals_Release();
+    }
+#endif
+#endif
+}
+
+RPY_EXTERN
+int vmprof_enable(int memory, int native, int real_time)
+{
+    if (!thread_started) {
+        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
+            return -1;
+        }
+        thread_started = 1;
+    }
+    enabled = 1;
+    return 0;
+}
+
+RPY_EXTERN
+int vmprof_disable(void)
+{
+    char marker = MARKER_TRAILER;
+    (void)vmp_write_time_now(MARKER_TRAILER);
+
+    enabled = 0;
+    vmp_set_profile_fileno(-1);
+    return 0;
+}
+
+RPY_EXTERN
+void vmprof_ignore_signals(int ignored)
+{
+    enabled = !ignored;
+}
+
+int vmp_native_enable(void)
+{
+    return 0;
+}
+
+void vmp_native_disable(void)
+{
+}
+
+int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
+                    int max_depth, intptr_t pc)
+{
+    return 0;
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
@@ -3,20 +3,13 @@
 #include "windows.h"
 #include "compat.h"
 #include "vmp_stack.h"
-
-HANDLE write_mutex;
+#include <tlhelp32.h>
 
 int prepare_concurrent_bufs(void);
 
-#include "vmprof_common.h"
-#include <tlhelp32.h>
-
 // This file has been inspired (but not copied from since the LICENSE
 // would not allow it) from verysleepy profiler
 
-volatile int thread_started = 0;
-volatile int enabled = 0;
-
 int vmp_write_all(const char *buf, size_t bufsize);
 
 #ifdef RPYTHON_VMPROF
@@ -26,178 +19,14 @@
 #endif
 
 
-RPY_EXTERN
 int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
-                                     int auto_retry)
-{
-    char buf[2048];
-    long namelen;
+                                     int auto_retry);
 
-    namelen = (long)strnlen(code_name, 1023);
-    buf[0] = MARKER_VIRTUAL_IP;
-    *(intptr_t*)(buf + 1) = code_uid;
-    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
-    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
-    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
-    return 0;
-}
-
-int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
-{
-    HRESULT result;
-    HANDLE hThread;
-    int depth;
-    CONTEXT ctx;
-#ifdef RPYTHON_LL2CTYPES
-    return 0; // not much we can do
-#else
-#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
-    return 0; // we can't freeze threads, unsafe
-#else
-    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
-    if (!hThread) {
-        return -1;
-    }
-    result = SuspendThread(hThread);
-    if(result == 0xffffffff)
-        return -1; // possible, e.g. attached debugger or thread alread suspended
-    // find the correct thread
-#ifdef RPYTHON_VMPROF
-    ctx.ContextFlags = CONTEXT_FULL;
-    if (!GetThreadContext(hThread, &ctx))
-        return -1;
-    depth = get_stack_trace(tstate->vmprof_tl_stack,
-                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
-    stack->depth = depth;
-    stack->stack[depth++] = thread_id;
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#else
-    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
-                                      MAX_STACK_DEPTH, 0, 0);
-    stack->depth = depth;
-    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#endif
-
-#endif
-#endif
-}
-
-#ifndef RPYTHON_VMPROF
-static
-PY_WIN_THREAD_STATE * get_current_thread_state(void)
-{
-#if PY_MAJOR_VERSION < 3
-    return _PyThreadState_Current;
-#elif PY_VERSION_HEX < 0x03050200
-    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
-#else
-    return _PyThreadState_UncheckedGet();
-#endif
-}
-#endif
-
-long __stdcall vmprof_mainloop(void *arg)
-{
-#ifdef RPYTHON_LL2CTYPES
-    // for tests only
-    return 0;
-#else
-    // it is not a test case!
-    PY_WIN_THREAD_STATE *tstate;
-    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
-    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
-    int depth;
-#ifndef RPYTHON_VMPROF
-    // cpython version
-    while (1) {
-        Sleep(profile_interval_usec * 1000);
-        if (!enabled) {
-            continue;
-        }
-        tstate = get_current_thread_state();
-        if (!tstate)
-            continue;
-        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
-        if (depth > 0) {
-            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
-        }
-    }
-#else
-    // pypy version
-    while (1) {
-        //Sleep(profile_interval_usec * 1000);
-        Sleep(10);
-        if (!enabled) {
-            continue;
-        }
-        _RPython_ThreadLocals_Acquire();
-        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
-        tstate = _RPython_ThreadLocals_Enum(tstate);
-        while (tstate) {
-            if (tstate->ready == 42) {
-                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
-                if (depth > 0) {
-                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                         depth * sizeof(void *) +
-                         sizeof(struct prof_stacktrace_s) -
-                         offsetof(struct prof_stacktrace_s, marker));
-                }
-            }
-            tstate = _RPython_ThreadLocals_Enum(tstate);
-        }
-        _RPython_ThreadLocals_Release();
-    }
-#endif
-#endif
-}
-
-RPY_EXTERN
-int vmprof_enable(int memory, int native, int real_time)
-{
-    if (!thread_started) {
-        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
-            return -1;
-        }
-        thread_started = 1;
-    }
-    enabled = 1;
-    return 0;
-}
-
-RPY_EXTERN
-int vmprof_disable(void)
-{
-    char marker = MARKER_TRAILER;
-    (void)vmp_write_time_now(MARKER_TRAILER);
-
-    enabled = 0;
-    vmp_set_profile_fileno(-1);
-    return 0;
-}
-
-RPY_EXTERN
-void vmprof_ignore_signals(int ignored)
-{
-    enabled = !ignored;
-}
-
-int vmp_native_enable(void) {
-    return 0;
-}
-
-void vmp_native_disable(void) {
-}
-
+PY_WIN_THREAD_STATE * get_current_thread_state(void);
+int vmprof_enable(int memory, int native, int real_time);
+int vmprof_disable(void);
+void vmprof_ignore_signals(int ignored);
+int vmp_native_enable(void);
+void vmp_native_disable(void);
 int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
-		    int max_depth, intptr_t pc)
-{
-    return 0;
-}
+                    int max_depth, intptr_t pc);

From pypy.commits at gmail.com  Mon Nov  6 05:40:31 2017
From: pypy.commits at gmail.com (antocuni)
Date: Mon, 06 Nov 2017 02:40:31 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: check also the subdirectories
Message-ID: <5a003c1f.52bf1c0a.ebca9.b65c@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92952:2b6ce63316a3
Date: 2017-11-06 11:35 +0100
http://bitbucket.org/pypy/pypy/changeset/2b6ce63316a3/

Log:	check also the subdirectories

diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -5,19 +5,25 @@
 RVMPROF = py.path.local(__file__).join('..', '..')
 
 def github_raw_file(repo, path, branch='master'):
-    return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict(
-                repo=repo, path=path, branch=branch
-            ))
+    url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}"
+    return url.format(repo=repo, path=path, branch=branch)
 
+def get_list_of_files(shared):
+    files = list(shared.visit('*.[ch]'))
+    files.remove(shared.join('libbacktrace', 'config-x86_32.h'))
+    files.remove(shared.join('libbacktrace', 'config-x86_64.h'))
+    files.remove(shared.join('libbacktrace', 'gstdint.h'))
+    return files
 
 def test_same_file():
     shared = RVMPROF.join('src', 'shared')
-    files = shared.listdir('*.[ch]')
+    files = get_list_of_files(shared)
     assert files, 'cannot find any C file, probably the directory is wrong?'
     no_matches = []
     print
     for file in files:
-        url = github_raw_file("vmprof/vmprof-python", "src/%s" % file.basename)
+        path = file.relto(shared)
+        url = github_raw_file("vmprof/vmprof-python", "src/%s" % path)
         source = urllib2.urlopen(url).read()
         dest = file.read()
         shortname = file.relto(RVMPROF)

From pypy.commits at gmail.com  Mon Nov  6 05:40:33 2017
From: pypy.commits at gmail.com (antocuni)
Date: Mon, 06 Nov 2017 02:40:33 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: add a comment
Message-ID: <5a003c21.831d1c0a.97c7a.9051@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92953:7fb3b80d41b2
Date: 2017-11-06 11:36 +0100
http://bitbucket.org/pypy/pypy/changeset/7fb3b80d41b2/

Log:	add a comment

diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -10,6 +10,8 @@
 
 def get_list_of_files(shared):
     files = list(shared.visit('*.[ch]'))
+    # in PyPy we checkin the result of ./configure; as such, these files are
+    # not in github and can be skipped
     files.remove(shared.join('libbacktrace', 'config-x86_32.h'))
     files.remove(shared.join('libbacktrace', 'config-x86_64.h'))
     files.remove(shared.join('libbacktrace', 'gstdint.h'))

From pypy.commits at gmail.com  Mon Nov  6 11:15:42 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 06 Nov 2017 08:15:42 -0800 (PST)
Subject: [pypy-commit] pypy default: Add testrunner/get_info.py script for
 the buildbot
Message-ID: <5a008aae.15981c0a.a4939.77ae@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92955:e68c2a6d0069
Date: 2017-11-06 16:15 +0000
http://bitbucket.org/pypy/pypy/changeset/e68c2a6d0069/

Log:	Add testrunner/get_info.py script for the buildbot

diff --git a/testrunner/get_info.py b/testrunner/get_info.py
new file mode 100644
--- /dev/null
+++ b/testrunner/get_info.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+"""
+Dump some translation information to stdout as JSON. Used by buildbot.
+"""
+
+import sys
+import os
+import json
+
+BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+TARGET_BASENAME = 'pypy-c'
+
+def make_info_dict():
+    target = TARGET_BASENAME
+    if sys.platform.startswith('win'):
+        target += '.exe'
+    target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target)
+    return {'target_path': target_path}
+
+def dump_info():
+    return json.dumps(make_info_dict())
+
+if __name__ == '__main__':
+    print dump_info()

From pypy.commits at gmail.com  Mon Nov  6 11:54:28 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 06 Nov 2017 08:54:28 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a0093c4.d5301c0a.a92b1.fe09@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92956:ed4ba7032f9d
Date: 2017-11-06 16:53 +0000
http://bitbucket.org/pypy/pypy/changeset/ed4ba7032f9d/

Log:	hg merge default

diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py
--- a/lib_pypy/_tkinter/app.py
+++ b/lib_pypy/_tkinter/app.py
@@ -185,6 +185,9 @@
             if err == tklib.TCL_ERROR:
                 self.raiseTclError()
 
+    def interpaddr(self):
+        return int(tkffi.cast('size_t', self.interp))
+
     def _var_invoke(self, func, *args, **kwargs):
         if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread():
             # The current thread is not the interpreter thread.
diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py
--- a/rpython/jit/metainterp/test/test_del.py
+++ b/rpython/jit/metainterp/test/test_del.py
@@ -82,46 +82,5 @@
         assert res == 1
         self.check_resops(call_r=1)   # for the case B(), but not for the case A()
 
-    def test_keepalive(self):
-        py.test.skip("XXX fails")   # hum, I think the test itself is broken
-        #
-        mydriver = JitDriver(reds = ['n', 'states'], greens = [])
-        class State:
-            num = 1
-        class X:
-            def __init__(self, state):
-                self.state = state
-            def __del__(self):
-                self.state.num += 1
-        @dont_look_inside
-        def do_stuff():
-            pass
-        def f(n):
-            states = []
-            while n > 0:
-                mydriver.jit_merge_point(n=n, states=states)
-                state = State()
-                states.append(state)
-                x = X(state)
-                do_stuff()
-                state.num *= 1000
-                do_stuff()
-                keepalive_until_here(x)
-                n -= 1
-            return states
-        def main(n):
-            states = f(n)
-            rgc.collect()
-            rgc.collect()
-            err = 1001
-            for state in states:
-                if state.num != 1001:
-                    err = state.num
-                    print 'ERROR:', err
-            return err
-        assert main(20) == 1001
-        res = self.meta_interp(main, [20])
-        assert res == 1001
-
 class TestLLtype(DelTests, LLJitMixin):
     pass
diff --git a/testrunner/get_info.py b/testrunner/get_info.py
new file mode 100644
--- /dev/null
+++ b/testrunner/get_info.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+"""
+Dump some translation information to stdout as JSON. Used by buildbot.
+"""
+
+import sys
+import os
+import json
+
+BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+TARGET_BASENAME = 'pypy-c'
+
+def make_info_dict():
+    target = TARGET_BASENAME
+    if sys.platform.startswith('win'):
+        target += '.exe'
+    target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target)
+    return {'target_path': target_path}
+
+def dump_info():
+    return json.dumps(make_info_dict())
+
+if __name__ == '__main__':
+    print dump_info()

From pypy.commits at gmail.com  Mon Nov  6 12:04:51 2017
From: pypy.commits at gmail.com (antocuni)
Date: Mon, 06 Nov 2017 09:04:51 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: one more refactor
Message-ID: <5a009633.94ae1c0a.4c38f.806f@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92958:5f1804f818b4
Date: 2017-11-06 18:04 +0100
http://bitbucket.org/pypy/pypy/changeset/5f1804f818b4/

Log:	one more refactor

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -60,30 +60,23 @@
         assert self.rpy_entry_point() == 0
 
 
-def test_register_code():
-
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
+class TestRegisterCode(RVMProfTest):
+    
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
         print num
         return 42
 
-    def f():
-        code = MyCode()
+    def entry_point(self):
+        code = self.MyCode()
         rvmprof.register_code(code, lambda code: 'some code')
-        res = main(code, 5)
+        res = self.main(code, 5)
         assert res == 42
         return 0
 
-    assert f() == 0
-    fn = compile(f, []) #, gcpolicy="minimark")
-    assert fn() == 0
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
 
 
 def test_enable():

From pypy.commits at gmail.com  Mon Nov  6 12:04:49 2017
From: pypy.commits at gmail.com (antocuni)
Date: Mon, 06 Nov 2017 09:04:49 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: WIP: rvmprof tests are a 90% one
 the copy of another,
 but they are a tangled mess. Start to refactor into a more manageable
 structure
Message-ID: <5a009631.95091c0a.dea7.22df@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92957:6847b345ac78
Date: 2017-11-06 17:30 +0100
http://bitbucket.org/pypy/pypy/changeset/6847b345ac78/

Log:	WIP: rvmprof tests are a 90% one the copy of another, but they are a
	tangled mess. Start to refactor into a more manageable structure

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -7,57 +7,57 @@
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import rffi, lltype
 
+class RVMProfTest:
 
-def test_vmprof_execute_code_1():
+    class MyCode: pass
 
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
+    def setup_method(self, meth):
+        self.register()
+        self.rpy_entry_point = compile(self.entry_point, [])
 
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
+    def register(self):
+        try:
+            rvmprof.register_code_object_class(self.MyCode,
+                                               lambda code: 'some code')
+        except rvmprof.VMProfPlatformUnsupported as e:
+            py.test.skip(str(e))
+
+
+class TestExecuteCode(RVMProfTest):
+
+    def entry_point(self):
+        res = self.main(self.MyCode(), 5)
+        assert res == 42
+        return 0
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
         print num
         return 42
 
-    def f():
-        res = main(MyCode(), 5)
-        assert res == 42
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
+
+
+class TestResultClass(RVMProfTest):
+
+    class A: pass
+
+    @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code,
+                                 result_class=A)
+    def main(self, num, code):
+        print num
+        return self.A()
+
+    def entry_point(self):
+        a = self.main(7, self.MyCode())
+        assert isinstance(a, self.A)
         return 0
 
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
-
-
-def test_vmprof_execute_code_2():
-
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
-
-    class A:
-        pass
-
-    @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code,
-                                 result_class=A)
-    def main(num, code):
-        print num
-        return A()
-
-    def f():
-        a = main(7, MyCode())
-        assert isinstance(a, A)
-        return 0
-
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
 
 
 def test_register_code():
@@ -82,7 +82,7 @@
         return 0
 
     assert f() == 0
-    fn = compile(f, [], gcpolicy="minimark")
+    fn = compile(f, []) #, gcpolicy="minimark")
     assert fn() == 0
 
 
@@ -193,6 +193,7 @@
         fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666)
         num = 10000
         period = 0.0001
+
         rvmprof.enable(fd, period, native=1)
         for i in range(num):
             res = main(code, 3)
diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py
--- a/rpython/translator/translator.py
+++ b/rpython/translator/translator.py
@@ -141,6 +141,9 @@
     if isinstance(func, FunctionGraph):
         return func
     result = []
+    if hasattr(func, 'im_func'):
+        # make it possible to translate bound methods
+        func = func.im_func
     for graph in translator.graphs:
         if getattr(graph, 'func', None) is func:
             result.append(graph)

From pypy.commits at gmail.com  Mon Nov  6 13:22:41 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 06 Nov 2017 10:22:41 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix TARGET_BASENAME for pypy3
Message-ID: <5a00a871.26acdf0a.2f304.11e9@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92959:585896fe6599
Date: 2017-11-06 18:22 +0000
http://bitbucket.org/pypy/pypy/changeset/585896fe6599/

Log:	Fix TARGET_BASENAME for pypy3

diff --git a/testrunner/get_info.py b/testrunner/get_info.py
--- a/testrunner/get_info.py
+++ b/testrunner/get_info.py
@@ -8,7 +8,7 @@
 import json
 
 BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
-TARGET_BASENAME = 'pypy-c'
+TARGET_BASENAME = 'pypy3-c'
 
 def make_info_dict():
     target = TARGET_BASENAME

From pypy.commits at gmail.com  Mon Nov  6 14:29:19 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 06 Nov 2017 11:29:19 -0800 (PST)
Subject: [pypy-commit] buildbot default: Use testrunner/get_info.py to get
 the name of the pypy executable
Message-ID: <5a00b80f.52c6df0a.a9cc2.4a79@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r1038:33fc33e373e0
Date: 2017-11-06 19:29 +0000
http://bitbucket.org/pypy/buildbot/changeset/33fc33e373e0/

Log:	Use testrunner/get_info.py to get the name of the pypy executable

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -4,11 +4,12 @@
 from buildbot.process import factory
 from buildbot.steps import shell, transfer
 from buildbot.steps.trigger import Trigger
-from buildbot.process.properties import WithProperties, Interpolate
+from buildbot.process.properties import WithProperties, Interpolate, Property
 from buildbot import locks
 from pypybuildbot.util import symlink_force
 from buildbot.status.results import SKIPPED, SUCCESS
 import os
+import json
 
 # buildbot supports SlaveLocks, which can be used to limit the amout of builds
 # to be run on each slave in parallel.  However, they assume that each
@@ -375,6 +376,7 @@
                 alwaysUseLatest=alwaysUseLatest,
                 logEnviron=False))
 
+
 def setup_steps(platform, factory, workdir=None,
                 repourl='https://bitbucket.org/pypy/pypy/',
                 force_branch=None):
@@ -392,6 +394,14 @@
     #
     factory.addStep(CheckGotRevision(workdir=workdir))
 
+    def extract_info(rc, stdout, stderr):
+        if rc == 0:
+            return json.loads(stdout)
+        else:
+            return {}
+    factory.addStep(shell.SetPropertyFromCommand(
+        command=['python', 'testrunner/get_info.py'],
+        extract_fn=extract_info))
 
 def build_name(platform, jit=False, flags=[], placeholder=None):
     if placeholder is None:
@@ -457,11 +467,10 @@
             timeout=4000,
             env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest),
                 }))
-        test_interpreter = '../build/pypy/goal/pypy-c'
         factory.addStep(ShellCmd(
             description="Create virtualenv",
-            command=prefix + ['virtualenv', '--clear', '-p', test_interpreter,
-                'pypy-venv'],
+            command=prefix + ['virtualenv', '--clear', '-p',
+                Property('target_path'), 'pypy-venv'],
             workdir='venv',
             flunkOnFailure=True))
         if platform == 'win32':

From pypy.commits at gmail.com  Tue Nov  7 13:39:22 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 07 Nov 2017 10:39:22 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Bail early in .startswith() and
 .endswith() is start is past the end of the string.
Message-ID: <5a01fdda.46901c0a.9954e.7bc5@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92960:1233d5aa782f
Date: 2017-11-07 18:38 +0000
http://bitbucket.org/pypy/pypy/changeset/1233d5aa782f/

Log:	Bail early in .startswith() and .endswith() is start is past the end
	of the string.

	This prevents an overflow, followed by a segfault, in
	rpython.rlib.rstring.startswith() when start is close to sys.maxint.

diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -628,6 +628,8 @@
 
     def _startswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
+        if start > len(value):
+            return False
         return startswith(value, prefix, start, end)
 
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
@@ -653,6 +655,8 @@
 
     def _endswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
+        if start > len(value):
+            return False
         return endswith(value, prefix, start, end)
 
     def _strip(self, space, w_chars, left, right, name='strip'):

From pypy.commits at gmail.com  Tue Nov  7 14:12:11 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 07 Nov 2017 11:12:11 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Remove explicit refcount checks from
 _testcapi
Message-ID: <5a02058b.03251c0a.283b7.48cf@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92961:aa87739bdc0a
Date: 2017-11-07 19:11 +0000
http://bitbucket.org/pypy/pypy/changeset/aa87739bdc0a/

Log:	Remove explicit refcount checks from _testcapi

diff --git a/lib_pypy/_testcapimodule.c b/lib_pypy/_testcapimodule.c
--- a/lib_pypy/_testcapimodule.c
+++ b/lib_pypy/_testcapimodule.c
@@ -915,12 +915,6 @@
         return -1;
     }
     Py_DECREF(res);
-    if (Py_REFCNT(arg) != 1) {
-        PyErr_Format(TestError, "test_buildvalue_N: "
-                     "arg was not decrefed in successful "
-                     "Py_BuildValue(\"%s\")", fmt);
-        return -1;
-    }
 
     Py_INCREF(arg);
     res = Py_BuildValue(fmt, raise_error, NULL, arg);
@@ -930,12 +924,6 @@
         return -1;
     }
     PyErr_Clear();
-    if (Py_REFCNT(arg) != 1) {
-        PyErr_Format(TestError, "test_buildvalue_N: "
-                     "arg was not decrefed in failed "
-                     "Py_BuildValue(\"%s\")", fmt);
-        return -1;
-    }
     Py_DECREF(arg);
     return 0;
 }
@@ -958,10 +946,6 @@
         return raiseTestError("test_buildvalue_N",
                               "Py_BuildValue(\"N\") returned wrong result");
     }
-    if (Py_REFCNT(arg) != 2) {
-        return raiseTestError("test_buildvalue_N",
-                              "arg was not decrefed in Py_BuildValue(\"N\")");
-    }
     Py_DECREF(res);
     Py_DECREF(arg);
 

From pypy.commits at gmail.com  Tue Nov  7 19:40:17 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:17 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: apparently,
 gc='minimark' is not needed for this test. Not sure why it was
 written like that
Message-ID: <5a025271.42da1c0a.356df.ce91@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92962:5cc71a3d3d71
Date: 2017-11-07 12:07 +0100
http://bitbucket.org/pypy/pypy/changeset/5cc71a3d3d71/

Log:	apparently, gc='minimark' is not needed for this test. Not sure why
	it was written like that

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -131,7 +131,7 @@
 
     assert f() == 0
     assert os.path.exists(tmpfilename)
-    fn = compile(f, [], gcpolicy="minimark")
+    fn = compile(f, [])
     assert fn() == 0
     try:
         check_profile(tmpfilename)

From pypy.commits at gmail.com  Tue Nov  7 19:40:19 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:19 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: refactor test_enable to use the
 new style of testing
Message-ID: <5a025273.424a1c0a.62b2c.5865@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92963:1067112a9755
Date: 2017-11-07 15:48 +0100
http://bitbucket.org/pypy/pypy/changeset/1067112a9755/

Log:	refactor test_enable to use the new style of testing

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -1,4 +1,5 @@
 import py, os
+import pytest
 from rpython.tool.udir import udir
 from rpython.rlib import rvmprof
 from rpython.translator.c.test.test_genc import compile
@@ -7,18 +8,22 @@
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import rffi, lltype
 
-class RVMProfTest:
+ at pytest.mark.usefixtures('init')
+class RVMProfTest(object):
 
     class MyCode: pass
 
-    def setup_method(self, meth):
+    @pytest.fixture
+    def init(self):
         self.register()
         self.rpy_entry_point = compile(self.entry_point, [])
 
     def register(self):
+        def get_name(code):
+            return 'py:code:52:x'
+
         try:
-            rvmprof.register_code_object_class(self.MyCode,
-                                               lambda code: 'some code')
+            rvmprof.register_code_object_class(self.MyCode, get_name)
         except rvmprof.VMProfPlatformUnsupported as e:
             py.test.skip(str(e))
 
@@ -79,19 +84,17 @@
         assert self.rpy_entry_point() == 0
 
 
-def test_enable():
+class TestEnable(RVMProfTest):
 
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
+    @pytest.fixture
+    def init(self, tmpdir):
+        self.tmpdir = tmpdir
+        self.tmpfile = tmpdir.join('profile.vmprof')
+        self.tmpfilename = str(self.tmpfile)
+        super(TestEnable, self).init()
 
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
         print num
         s = 0
         for i in range(num):
@@ -100,15 +103,16 @@
                 print s
         return s
 
-    tmpfilename = str(udir.join('test_rvmprof'))
+    def entry_point(self):
+        def get_name(code):
+            return 'py:code:52:x'
 
-    def f():
         if NonConstant(False):
             # Hack to give os.open() the correct annotation
             os.open('foo', 1, 1)
-        code = MyCode()
+        code = self.MyCode()
         rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
+        fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
         if we_are_translated():
             num = 100000000
             period = 0.0001
@@ -116,28 +120,24 @@
             num = 10000
             period = 0.9
         rvmprof.enable(fd, period)
-        res = main(code, num)
+        res = self.main(code, num)
         #assert res == 499999500000
         rvmprof.disable()
         os.close(fd)
         return 0
 
-    def check_profile(filename):
+    def test(self):
         from vmprof import read_profile
-
-        prof = read_profile(filename)
+        assert self.entry_point() == 0
+        assert self.tmpfile.check()
+        self.tmpfile.remove()
+        #
+        assert self.rpy_entry_point() == 0
+        assert self.tmpfile.check()
+        prof = read_profile(self.tmpfilename)
         assert prof.get_tree().name.startswith("py:")
         assert prof.get_tree().count
 
-    assert f() == 0
-    assert os.path.exists(tmpfilename)
-    fn = compile(f, [])
-    assert fn() == 0
-    try:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
 
 def test_native():
     eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],

From pypy.commits at gmail.com  Tue Nov  7 19:40:26 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:26 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: apparently, we don't need this
Message-ID: <5a02527a.21b9df0a.93d86.cbd4@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92966:0944d36d3dda
Date: 2017-11-07 16:53 +0100
http://bitbucket.org/pypy/pypy/changeset/0944d36d3dda/

Log:	apparently, we don't need this

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -3,8 +3,6 @@
 from rpython.tool.udir import udir
 from rpython.rlib import rvmprof
 from rpython.translator.c.test.test_genc import compile
-from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.nonconst import NonConstant
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import rffi, lltype
 
@@ -100,9 +98,6 @@
 
     ENTRY_POINT_ARGS = (int, float)
     def entry_point(self, count, period):
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
         code = self.MyCode('py:code:52:test_enable')
         rvmprof.register_code(code, self.MyCode.get_name)
         fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)

From pypy.commits at gmail.com  Tue Nov  7 19:40:22 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:22 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: make it possible to specify a
 name when you create MyCode()
Message-ID: <5a025276.05c4df0a.f10f0.76d1@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92964:2cc191d05d43
Date: 2017-11-07 15:54 +0100
http://bitbucket.org/pypy/pypy/changeset/2cc191d05d43/

Log:	make it possible to specify a name when you create MyCode()

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -11,7 +11,12 @@
 @pytest.mark.usefixtures('init')
 class RVMProfTest(object):
 
-    class MyCode: pass
+    class MyCode(object):
+        def __init__(self, name='py:code:0:noname'):
+            self.name = name
+
+        def get_name(self):
+            return self.name
 
     @pytest.fixture
     def init(self):
@@ -19,11 +24,9 @@
         self.rpy_entry_point = compile(self.entry_point, [])
 
     def register(self):
-        def get_name(code):
-            return 'py:code:52:x'
-
         try:
-            rvmprof.register_code_object_class(self.MyCode, get_name)
+            rvmprof.register_code_object_class(self.MyCode,
+                                               self.MyCode.get_name)
         except rvmprof.VMProfPlatformUnsupported as e:
             py.test.skip(str(e))
 
@@ -104,14 +107,11 @@
         return s
 
     def entry_point(self):
-        def get_name(code):
-            return 'py:code:52:x'
-
         if NonConstant(False):
             # Hack to give os.open() the correct annotation
             os.open('foo', 1, 1)
-        code = self.MyCode()
-        rvmprof.register_code(code, get_name)
+        code = self.MyCode('py:code:52:test_enable')
+        rvmprof.register_code(code, self.MyCode.get_name)
         fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
         if we_are_translated():
             num = 100000000
@@ -135,8 +135,9 @@
         assert self.rpy_entry_point() == 0
         assert self.tmpfile.check()
         prof = read_profile(self.tmpfilename)
-        assert prof.get_tree().name.startswith("py:")
-        assert prof.get_tree().count
+        tree = prof.get_tree()
+        assert tree.name == 'py:code:52:test_enable'
+        assert tree.count
 
 
 def test_native():

From pypy.commits at gmail.com  Tue Nov  7 19:40:28 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:28 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: improve test_enable by: 1) make
 sure that it runs for approximately 0.5 seconds;
 2) check that the number of profiles is what we expect
Message-ID: <5a02527c.53d71c0a.3c4c6.f084@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92967:6c26abf30648
Date: 2017-11-08 01:21 +0100
http://bitbucket.org/pypy/pypy/changeset/6c26abf30648/

Log:	improve test_enable by: 1) make sure that it runs for approximately
	0.5 seconds; 2) check that the number of profiles is what we expect

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -1,5 +1,6 @@
 import py, os
 import pytest
+import time
 from rpython.tool.udir import udir
 from rpython.rlib import rvmprof
 from rpython.translator.c.test.test_genc import compile
@@ -89,6 +90,10 @@
 
 class RVMProfSamplingTest(RVMProfTest):
 
+    # the kernel will deliver SIGPROF at max 250 Hz. See also
+    # https://github.com/vmprof/vmprof-python/issues/163
+    SAMPLING_INTERVAL = 1/250.0
+
     @pytest.fixture
     def init(self, tmpdir):
         self.tmpdir = tmpdir
@@ -97,41 +102,44 @@
         super(RVMProfSamplingTest, self).init()
 
     ENTRY_POINT_ARGS = (int, float)
-    def entry_point(self, count, period):
+    def entry_point(self, value, delta_t):
         code = self.MyCode('py:code:52:test_enable')
         rvmprof.register_code(code, self.MyCode.get_name)
         fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
-        rvmprof.enable(fd, period)
-        res = self.main(code, count)
+        rvmprof.enable(fd, self.SAMPLING_INTERVAL)
+        start = time.time()
+        res = 0
+        while time.time() < start+delta_t:
+            res = self.main(code, value)
         rvmprof.disable()
         os.close(fd)
         return res
 
+    def approx_equal(self, a, b, tolerance=0.1):
+        max_diff = (a+b)/2.0 * tolerance
+        return abs(a-b) < max_diff
 
 class TestEnable(RVMProfSamplingTest):
 
     @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
     def main(self, code, count):
-        print count
         s = 0
         for i in range(count):
             s += (i << 1)
-            if s % 2123423423 == 0:
-                print s
         return s
 
     def test(self):
         from vmprof import read_profile
-        assert self.entry_point(10**4, 0.9) == 99990000
+        assert self.entry_point(10**4, 0.1) == 99990000
         assert self.tmpfile.check()
         self.tmpfile.remove()
         #
-        assert self.rpy_entry_point(10**8, 0.0001) == 9999999900000000
+        assert self.rpy_entry_point(10**4, 0.5) == 99990000
         assert self.tmpfile.check()
         prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
         assert tree.name == 'py:code:52:test_enable'
-        assert tree.count
+        assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL)
 
 
 def test_native():

From pypy.commits at gmail.com  Tue Nov  7 19:40:24 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:24 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: factor out some reusable logic
 from TestEnabled, which will be usable also from the upcoming TestNative
Message-ID: <5a025278.53d71c0a.3c4c6.f07a@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92965:70e2f742d15e
Date: 2017-11-07 16:53 +0100
http://bitbucket.org/pypy/pypy/changeset/70e2f742d15e/

Log:	factor out some reusable logic from TestEnabled, which will be
	usable also from the upcoming TestNative

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -11,6 +11,8 @@
 @pytest.mark.usefixtures('init')
 class RVMProfTest(object):
 
+    ENTRY_POINT_ARGS = ()
+
     class MyCode(object):
         def __init__(self, name='py:code:0:noname'):
             self.name = name
@@ -21,7 +23,7 @@
     @pytest.fixture
     def init(self):
         self.register()
-        self.rpy_entry_point = compile(self.entry_point, [])
+        self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS)
 
     def register(self):
         try:
@@ -87,52 +89,49 @@
         assert self.rpy_entry_point() == 0
 
 
-class TestEnable(RVMProfTest):
+class RVMProfSamplingTest(RVMProfTest):
 
     @pytest.fixture
     def init(self, tmpdir):
         self.tmpdir = tmpdir
         self.tmpfile = tmpdir.join('profile.vmprof')
         self.tmpfilename = str(self.tmpfile)
-        super(TestEnable, self).init()
+        super(RVMProfSamplingTest, self).init()
 
-    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
-    def main(self, code, num):
-        print num
-        s = 0
-        for i in range(num):
-            s += (i << 1)
-            if s % 2123423423 == 0:
-                print s
-        return s
-
-    def entry_point(self):
+    ENTRY_POINT_ARGS = (int, float)
+    def entry_point(self, count, period):
         if NonConstant(False):
             # Hack to give os.open() the correct annotation
             os.open('foo', 1, 1)
         code = self.MyCode('py:code:52:test_enable')
         rvmprof.register_code(code, self.MyCode.get_name)
         fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
-        if we_are_translated():
-            num = 100000000
-            period = 0.0001
-        else:
-            num = 10000
-            period = 0.9
         rvmprof.enable(fd, period)
-        res = self.main(code, num)
-        #assert res == 499999500000
+        res = self.main(code, count)
         rvmprof.disable()
         os.close(fd)
-        return 0
+        return res
+
+
+class TestEnable(RVMProfSamplingTest):
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
+        print count
+        s = 0
+        for i in range(count):
+            s += (i << 1)
+            if s % 2123423423 == 0:
+                print s
+        return s
 
     def test(self):
         from vmprof import read_profile
-        assert self.entry_point() == 0
+        assert self.entry_point(10**4, 0.9) == 99990000
         assert self.tmpfile.check()
         self.tmpfile.remove()
         #
-        assert self.rpy_entry_point() == 0
+        assert self.rpy_entry_point(10**8, 0.0001) == 9999999900000000
         assert self.tmpfile.check()
         prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()

From pypy.commits at gmail.com  Tue Nov  7 19:40:30 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:40:30 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: rewrite test_native by reusing
 RVMProfSamplingTest. It still fails, obviously
Message-ID: <5a02527e.c6a2df0a.13ea3.b893@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92968:e0fdd6a424df
Date: 2017-11-08 01:39 +0100
http://bitbucket.org/pypy/pypy/changeset/e0fdd6a424df/

Log:	rewrite test_native by reusing RVMProfSamplingTest. It still fails,
	obviously

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -119,6 +119,7 @@
         max_diff = (a+b)/2.0 * tolerance
         return abs(a-b) < max_diff
 
+
 class TestEnable(RVMProfSamplingTest):
 
     @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
@@ -142,66 +143,44 @@
         assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL)
 
 
-def test_native():
-    eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
-            separate_module_sources=["""
-            RPY_EXTERN int native_func(int d) {
-                int j = 0;
-                if (d > 0) {
-                    return native_func(d-1);
-                } else {
-                    for (int i = 0; i < 42000; i++) {
-                        j += d;
+class TestNative(RVMProfSamplingTest):
+
+    @pytest.fixture
+    def init(self, tmpdir):
+        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+                separate_module_sources=["""
+                RPY_EXTERN int native_func(int d) {
+                    int j = 0;
+                    if (d > 0) {
+                        return native_func(d-1);
+                    } else {
+                        for (int i = 0; i < 42000; i++) {
+                            j += 1;
+                        }
                     }
+                    return j;
                 }
-                return j;
-            }
-            """])
+                """])
+        self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
+                                           compilation_info=eci)
+        super(TestNative, self).init(tmpdir)
 
-    native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
-                                  compilation_info=eci)
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
+        if count > 0:
+            return self.main(code, count-1)
+        else:
+            return self.native_func(100)
 
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        if num > 0:
-            return main(code, num-1)
-        else:
-            return native_func(100)
-
-    tmpfilename = str(udir.join('test_rvmprof'))
-
-    def f():
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
-        code = MyCode()
-        rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666)
-        num = 10000
-        period = 0.0001
-
-        rvmprof.enable(fd, period, native=1)
-        for i in range(num):
-            res = main(code, 3)
-        rvmprof.disable()
-        os.close(fd)
-        return 0
-
-    def check_profile(filename):
+    def test(self):
+        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
+        # never ran it. FIXME.
         from vmprof import read_profile
         from vmprof.show import PrettyPrinter
-
-        prof = read_profile(filename)
+        assert self.rpy_entry_point(3, 0.5) == 42000
+        assert self.tmpfile.check()
+        #
+        prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
         p = PrettyPrinter()
         p._print_tree(tree)
@@ -220,12 +199,3 @@
                     del not_found[i]
                     break
         assert not_found == []
-
-    fn = compile(f, [], gcpolicy="incminimark", lldebug=True)
-    assert fn() == 0
-    try:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
-

From pypy.commits at gmail.com  Tue Nov  7 19:42:40 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 07 Nov 2017 16:42:40 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: I claim that tests should never
 be skipped implicitly. If there is some platform on which vmprof doesn't
 work,
 buildbot will tell us and we can skip them explicitly. Else the risk is to
 skip tests which are meant to run,
 as it happened with test_enable and test_native since forever
Message-ID: <5a025300.01141c0a.59d4e.19da@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92969:49caf38340af
Date: 2017-11-08 01:42 +0100
http://bitbucket.org/pypy/pypy/changeset/49caf38340af/

Log:	I claim that tests should never be skipped implicitly. If there is
	some platform on which vmprof doesn't work, buildbot will tell us
	and we can skip them explicitly. Else the risk is to skip tests
	which are meant to run, as it happened with test_enable and
	test_native since forever

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -25,11 +25,8 @@
         self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS)
 
     def register(self):
-        try:
-            rvmprof.register_code_object_class(self.MyCode,
-                                               self.MyCode.get_name)
-        except rvmprof.VMProfPlatformUnsupported as e:
-            py.test.skip(str(e))
+        rvmprof.register_code_object_class(self.MyCode,
+                                           self.MyCode.get_name)
 
 
 class TestExecuteCode(RVMProfTest):

From pypy.commits at gmail.com  Tue Nov  7 21:59:52 2017
From: pypy.commits at gmail.com (stian)
Date: Tue, 07 Nov 2017 18:59:52 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Kill dead code,
 clean up normalization,
 and disable an assert that causes C code warnings. Its a helper function for
 _x_divrem and since d is SHIFT - bits_in_digit,
 which is always SHIFT or smaller already
Message-ID: <5a027328.8cabdf0a.5316d.2ab6@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92970:7f48dd825978
Date: 2017-11-08 03:59 +0100
http://bitbucket.org/pypy/pypy/changeset/7f48dd825978/

Log:	Kill dead code, clean up normalization, and disable an assert that
	causes C code warnings. Its a helper function for _x_divrem and
	since d is SHIFT - bits_in_digit, which is always SHIFT or smaller
	already

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -1459,9 +1459,8 @@
             i -= 1
         assert i > 0
 
-        if i != self.numdigits():
-            self.size = i
-        if self.numdigits() == 1 and self._digits[0] == NULLDIGIT:
+        self.size = i
+        if i == 1 and self._digits[0] == NULLDIGIT:
             self.sign = 0
             self._digits = NULLDIGITS
 
@@ -1940,103 +1939,6 @@
     ret._normalize()
     return ret
 
-""" (*) Why adding t3 can't "run out of room" above.
-
-Let f(x) mean the floor of x and c(x) mean the ceiling of x.  Some facts
-to start with:
-
-1. For any integer i, i = c(i/2) + f(i/2).  In particular,
-   bsize = c(bsize/2) + f(bsize/2).
-2. shift = f(bsize/2)
-3. asize <= bsize
-4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this
-   routine, so asize > bsize/2 >= f(bsize/2) in this routine.
-
-We allocated asize + bsize result digits, and add t3 into them at an offset
-of shift.  This leaves asize+bsize-shift allocated digit positions for t3
-to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) =
-asize + c(bsize/2) available digit positions.
-
-bh has c(bsize/2) digits, and bl at most f(size/2) digits.  So bh+hl has
-at most c(bsize/2) digits + 1 bit.
-
-If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2)
-digits, and al has at most f(bsize/2) digits in any case.  So ah+al has at
-most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit.
-
-The product (ah+al)*(bh+bl) therefore has at most
-
-    c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits
-
-and we have asize + c(bsize/2) available digit positions.  We need to show
-this is always enough.  An instance of c(bsize/2) cancels out in both, so
-the question reduces to whether asize digits is enough to hold
-(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits.  If asize < bsize,
-then we're asking whether asize digits >= f(bsize/2) digits + 2 bits.  By #4,
-asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1
-digit is enough to hold 2 bits.  This is so since SHIFT=15 >= 2.  If
-asize == bsize, then we're asking whether bsize digits is enough to hold
-c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits
-is enough to hold 2 bits.  This is so if bsize >= 2, which holds because
-bsize >= KARATSUBA_CUTOFF >= 2.
-
-Note that since there's always enough room for (ah+al)*(bh+bl), and that's
-clearly >= each of ah*bh and al*bl, there's always enough room to subtract
-ah*bh and al*bl too.
-"""
-
-def _k_lopsided_mul(a, b):
-    # Not in use anymore, only account for like 1% performance. Perhaps if we
-    # Got rid of the extra list allocation this would be more effective.
-    """
-    b has at least twice the digits of a, and a is big enough that Karatsuba
-    would pay off *if* the inputs had balanced sizes.  View b as a sequence
-    of slices, each with a->ob_size digits, and multiply the slices by a,
-    one at a time.  This gives k_mul balanced inputs to work with, and is
-    also cache-friendly (we compute one double-width slice of the result
-    at a time, then move on, never bactracking except for the helpful
-    single-width slice overlap between successive partial sums).
-    """
-    asize = a.numdigits()
-    bsize = b.numdigits()
-    # nbdone is # of b digits already multiplied
-
-    assert asize > KARATSUBA_CUTOFF
-    assert 2 * asize <= bsize
-
-    # Allocate result space, and zero it out.
-    ret = rbigint([NULLDIGIT] * (asize + bsize), 1)
-
-    # Successive slices of b are copied into bslice.
-    #bslice = rbigint([0] * asize, 1)
-    # XXX we cannot pre-allocate, see comments below!
-    # XXX prevent one list from being created.
-    bslice = rbigint(sign=1)
-
-    nbdone = 0
-    while bsize > 0:
-        nbtouse = min(bsize, asize)
-
-        # Multiply the next slice of b by a.
-
-        #bslice.digits[:nbtouse] = b.digits[nbdone : nbdone + nbtouse]
-        # XXX: this would be more efficient if we adopted CPython's
-        # way to store the size, instead of resizing the list!
-        # XXX change the implementation, encoding length via the sign.
-        bslice._digits = b._digits[nbdone : nbdone + nbtouse]
-        bslice.size = nbtouse
-        product = _k_mul(a, bslice)
-
-        # Add into result.
-        _v_iadd(ret, nbdone, ret.numdigits() - nbdone,
-                product, product.numdigits())
-
-        bsize -= nbtouse
-        nbdone += nbtouse
-
-    ret._normalize()
-    return ret
-
 def _inplace_divrem1(pout, pin, n, size=0):
     """
     Divide bigint pin by non-zero digit n, storing quotient
@@ -2147,7 +2049,7 @@
     """
 
     carry = _unsigned_widen_digit(0)
-    assert 0 <= d and d < SHIFT
+    #assert 0 <= d and d < SHIFT
     i = 0
     while i < m:
         acc = a.uwidedigit(i) << d | carry
@@ -2166,7 +2068,7 @@
     acc = _unsigned_widen_digit(0)
     mask = (1 << d) - 1
 
-    assert 0 <= d and d < SHIFT
+    #assert 0 <= d and d < SHIFT
     i = m-1
     while i >= 0:
         acc = (carry << SHIFT) | a.uwidedigit(i)

From pypy.commits at gmail.com  Tue Nov  7 22:02:15 2017
From: pypy.commits at gmail.com (stian)
Date: Tue, 07 Nov 2017 19:02:15 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Kill test for removed function
Message-ID: <5a0273b7.a8a0df0a.d0ea3.3672@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92971:b9cf8efa4db1
Date: 2017-11-08 04:01 +0100
http://bitbucket.org/pypy/pypy/changeset/b9cf8efa4db1/

Log:	Kill test for removed function

diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -616,7 +616,7 @@
                     assert res3 == -num << z
                     assert res4 == -num >> z
                     
-        # Large digit
+        # Large digit, also invertion test.
         for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10):
             f1 = rbigint.fromlong(x)
             nf1 = rbigint.fromlong(-x)
@@ -871,14 +871,6 @@
         ret = lobj._k_mul(f1, f2)
         assert ret.tolong() == f1.tolong() * f2.tolong()
 
-    def test__k_lopsided_mul(self):
-        digs_a = KARATSUBA_CUTOFF + 3
-        digs_b = 3 * digs_a
-        f1 = bigint([lobj.MASK] * digs_a, 1)
-        f2 = bigint([lobj.MASK] * digs_b, 1)
-        ret = lobj._k_lopsided_mul(f1, f2)
-        assert ret.tolong() == f1.tolong() * f2.tolong()
-
     def test_longlong(self):
         max = 1L << (r_longlong.BITS-1)
         f1 = rbigint.fromlong(max-1)    # fits in r_longlong

From pypy.commits at gmail.com  Wed Nov  8 11:47:49 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 08 Nov 2017 08:47:49 -0800 (PST)
Subject: [pypy-commit] pypy default: merge the vmprof-0.4.10 branch:
Message-ID: <5a033535.82d91c0a.e573f.9d49@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r92973:4b7ad9d4be0d
Date: 2017-11-08 17:47 +0100
http://bitbucket.org/pypy/pypy/changeset/4b7ad9d4be0d/

Log:	merge the vmprof-0.4.10 branch:

	- copy the recent changes to the C part of vmprof from github

	- make sure that the tests are actually testing something: so far,
	most of the were just silently skipped on the nightly buildbot :(

	- test_native is broken: it has been broken since the merge of
	vmprof-0.4.8, but we didn't notice

	- I expect some tests to fail on weird architectures. Once we know
	which, we can explicitly skip them

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 cffi>=1.4.0
+vmprof>=0.4.10  # required to parse log files in rvmprof tests
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -32,12 +32,21 @@
 static size_t threads_size = 0;
 static size_t thread_count = 0;
 static size_t threads_size_step = 8;
-#endif
 
 int vmprof_get_itimer_type(void) {
     return itimer_type;
 }
 
+int vmprof_get_signal_type(void) {
+    return signal_type;
+}
+#endif
+
+#ifdef VMPROF_WINDOWS
+#include "vmprof_win.h"
+#endif
+
+
 int vmprof_is_enabled(void) {
     return is_enabled;
 }
@@ -62,10 +71,6 @@
     profile_interval_usec = value;
 }
 
-int vmprof_get_signal_type(void) {
-    return signal_type;
-}
-
 char *vmprof_init(int fd, double interval, int memory,
                   int proflines, const char *interp_name, int native, int real_time)
 {
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -15,7 +15,9 @@
 #include <pthread.h>
 #endif
 
+#ifdef VMPROF_UNIX
 #include "vmprof_getpc.h"
+#endif
 
 #ifdef VMPROF_LINUX
 #include <syscall.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
@@ -8,7 +8,7 @@
 #include <mach/task_info.h>
 
 static mach_port_t mach_task;
-#else
+#elif defined(VMPROF_UNIX)
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
@@ -41,8 +41,6 @@
 void vmprof_ignore_signals(int ignored)
 {
     if (ignored) {
-        /* set the last bit, and wait until concurrently-running signal
-           handlers finish */
         __sync_add_and_fetch(&signal_handler_ignore, 1L);
         while (signal_handler_entries != 0L) {
             usleep(1);
@@ -370,7 +368,7 @@
         goto error;
     if (install_sigprof_timer() == -1)
         goto error;
-    vmprof_ignore_signals(0);
+    signal_handler_ignore = 0;
     return 0;
 
  error:
@@ -394,7 +392,7 @@
 
 int vmprof_disable(void)
 {
-    vmprof_ignore_signals(1);
+    signal_handler_ignore = 1;
     vmprof_set_profile_interval_usec(0);
 #ifdef VMP_SUPPORTS_NATIVE_PROFILING
     disable_cpyprof();
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
@@ -1,7 +1,7 @@
-// cannot include this header because it also has definitions
-#include "windows.h"
-#include "compat.h"
-#include "vmp_stack.h"
+#include "vmprof_win.h"
+
+volatile int thread_started = 0;
+volatile int enabled = 0;
 
 HANDLE write_mutex;
 
@@ -12,7 +12,20 @@
     return 0;
 }
 
-#include <tlhelp32.h>
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
+                                     int auto_retry)
+{
+    char buf[2048];
+    long namelen;
+
+    namelen = (long)strnlen(code_name, 1023);
+    buf[0] = MARKER_VIRTUAL_IP;
+    *(intptr_t*)(buf + 1) = code_uid;
+    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
+    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
+    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
+    return 0;
+}
 
 int vmp_write_all(const char *buf, size_t bufsize)
 {
@@ -40,3 +53,168 @@
     return 0;
 }
 
+HANDLE write_mutex;
+
+#include "vmprof_common.h"
+
+int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
+{
+    HRESULT result;
+    HANDLE hThread;
+    int depth;
+    CONTEXT ctx;
+#ifdef RPYTHON_LL2CTYPES
+    return 0; // not much we can do
+#else
+#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
+    return 0; // we can't freeze threads, unsafe
+#else
+    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
+    if (!hThread) {
+        return -1;
+    }
+    result = SuspendThread(hThread);
+    if(result == 0xffffffff)
+        return -1; // possible, e.g. attached debugger or thread alread suspended
+    // find the correct thread
+#ifdef RPYTHON_VMPROF
+    ctx.ContextFlags = CONTEXT_FULL;
+    if (!GetThreadContext(hThread, &ctx))
+        return -1;
+    depth = get_stack_trace(tstate->vmprof_tl_stack,
+                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
+    stack->depth = depth;
+    stack->stack[depth++] = thread_id;
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#else
+    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
+                                      MAX_STACK_DEPTH, 0, 0);
+    stack->depth = depth;
+    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#endif
+
+#endif
+#endif
+}
+
+#ifndef RPYTHON_VMPROF
+static
+PY_WIN_THREAD_STATE * get_current_thread_state(void)
+{
+#if PY_MAJOR_VERSION < 3
+    return _PyThreadState_Current;
+#elif PY_VERSION_HEX < 0x03050200
+    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
+#else
+    return _PyThreadState_UncheckedGet();
+#endif
+}
+#endif
+
+long __stdcall vmprof_mainloop(void *arg)
+{
+#ifdef RPYTHON_LL2CTYPES
+    // for tests only
+    return 0;
+#else
+    // it is not a test case!
+    PY_WIN_THREAD_STATE *tstate;
+    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
+    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
+    int depth;
+#ifndef RPYTHON_VMPROF
+    // cpython version
+    while (1) {
+        Sleep(vmprof_get_profile_interval_usec() * 1000);
+        if (!enabled) {
+            continue;
+        }
+        tstate = get_current_thread_state();
+        if (!tstate)
+            continue;
+        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
+        if (depth > 0) {
+            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
+        }
+    }
+#else
+    // pypy version
+    while (1) {
+        //Sleep(vmprof_get_profile_interval_usec() * 1000);
+        Sleep(10);
+        if (!enabled) {
+            continue;
+        }
+        _RPython_ThreadLocals_Acquire();
+        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
+        tstate = _RPython_ThreadLocals_Enum(tstate);
+        while (tstate) {
+            if (tstate->ready == 42) {
+                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
+                if (depth > 0) {
+                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                         depth * sizeof(void *) +
+                         sizeof(struct prof_stacktrace_s) -
+                         offsetof(struct prof_stacktrace_s, marker));
+                }
+            }
+            tstate = _RPython_ThreadLocals_Enum(tstate);
+        }
+        _RPython_ThreadLocals_Release();
+    }
+#endif
+#endif
+}
+
+RPY_EXTERN
+int vmprof_enable(int memory, int native, int real_time)
+{
+    if (!thread_started) {
+        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
+            return -1;
+        }
+        thread_started = 1;
+    }
+    enabled = 1;
+    return 0;
+}
+
+RPY_EXTERN
+int vmprof_disable(void)
+{
+    char marker = MARKER_TRAILER;
+    (void)vmp_write_time_now(MARKER_TRAILER);
+
+    enabled = 0;
+    vmp_set_profile_fileno(-1);
+    return 0;
+}
+
+RPY_EXTERN
+void vmprof_ignore_signals(int ignored)
+{
+    enabled = !ignored;
+}
+
+int vmp_native_enable(void)
+{
+    return 0;
+}
+
+void vmp_native_disable(void)
+{
+}
+
+int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
+                    int max_depth, intptr_t pc)
+{
+    return 0;
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
@@ -3,20 +3,13 @@
 #include "windows.h"
 #include "compat.h"
 #include "vmp_stack.h"
-
-HANDLE write_mutex;
+#include <tlhelp32.h>
 
 int prepare_concurrent_bufs(void);
 
-#include "vmprof_common.h"
-#include <tlhelp32.h>
-
 // This file has been inspired (but not copied from since the LICENSE
 // would not allow it) from verysleepy profiler
 
-volatile int thread_started = 0;
-volatile int enabled = 0;
-
 int vmp_write_all(const char *buf, size_t bufsize);
 
 #ifdef RPYTHON_VMPROF
@@ -26,178 +19,14 @@
 #endif
 
 
-RPY_EXTERN
 int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
-                                     int auto_retry)
-{
-    char buf[2048];
-    long namelen;
+                                     int auto_retry);
 
-    namelen = (long)strnlen(code_name, 1023);
-    buf[0] = MARKER_VIRTUAL_IP;
-    *(intptr_t*)(buf + 1) = code_uid;
-    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
-    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
-    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
-    return 0;
-}
-
-int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
-{
-    HRESULT result;
-    HANDLE hThread;
-    int depth;
-    CONTEXT ctx;
-#ifdef RPYTHON_LL2CTYPES
-    return 0; // not much we can do
-#else
-#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
-    return 0; // we can't freeze threads, unsafe
-#else
-    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
-    if (!hThread) {
-        return -1;
-    }
-    result = SuspendThread(hThread);
-    if(result == 0xffffffff)
-        return -1; // possible, e.g. attached debugger or thread alread suspended
-    // find the correct thread
-#ifdef RPYTHON_VMPROF
-    ctx.ContextFlags = CONTEXT_FULL;
-    if (!GetThreadContext(hThread, &ctx))
-        return -1;
-    depth = get_stack_trace(tstate->vmprof_tl_stack,
-                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
-    stack->depth = depth;
-    stack->stack[depth++] = thread_id;
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#else
-    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
-                                      MAX_STACK_DEPTH, 0, 0);
-    stack->depth = depth;
-    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#endif
-
-#endif
-#endif
-}
-
-#ifndef RPYTHON_VMPROF
-static
-PY_WIN_THREAD_STATE * get_current_thread_state(void)
-{
-#if PY_MAJOR_VERSION < 3
-    return _PyThreadState_Current;
-#elif PY_VERSION_HEX < 0x03050200
-    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
-#else
-    return _PyThreadState_UncheckedGet();
-#endif
-}
-#endif
-
-long __stdcall vmprof_mainloop(void *arg)
-{
-#ifdef RPYTHON_LL2CTYPES
-    // for tests only
-    return 0;
-#else
-    // it is not a test case!
-    PY_WIN_THREAD_STATE *tstate;
-    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
-    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
-    int depth;
-#ifndef RPYTHON_VMPROF
-    // cpython version
-    while (1) {
-        Sleep(profile_interval_usec * 1000);
-        if (!enabled) {
-            continue;
-        }
-        tstate = get_current_thread_state();
-        if (!tstate)
-            continue;
-        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
-        if (depth > 0) {
-            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
-        }
-    }
-#else
-    // pypy version
-    while (1) {
-        //Sleep(profile_interval_usec * 1000);
-        Sleep(10);
-        if (!enabled) {
-            continue;
-        }
-        _RPython_ThreadLocals_Acquire();
-        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
-        tstate = _RPython_ThreadLocals_Enum(tstate);
-        while (tstate) {
-            if (tstate->ready == 42) {
-                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
-                if (depth > 0) {
-                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                         depth * sizeof(void *) +
-                         sizeof(struct prof_stacktrace_s) -
-                         offsetof(struct prof_stacktrace_s, marker));
-                }
-            }
-            tstate = _RPython_ThreadLocals_Enum(tstate);
-        }
-        _RPython_ThreadLocals_Release();
-    }
-#endif
-#endif
-}
-
-RPY_EXTERN
-int vmprof_enable(int memory, int native, int real_time)
-{
-    if (!thread_started) {
-        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
-            return -1;
-        }
-        thread_started = 1;
-    }
-    enabled = 1;
-    return 0;
-}
-
-RPY_EXTERN
-int vmprof_disable(void)
-{
-    char marker = MARKER_TRAILER;
-    (void)vmp_write_time_now(MARKER_TRAILER);
-
-    enabled = 0;
-    vmp_set_profile_fileno(-1);
-    return 0;
-}
-
-RPY_EXTERN
-void vmprof_ignore_signals(int ignored)
-{
-    enabled = !ignored;
-}
-
-int vmp_native_enable(void) {
-    return 0;
-}
-
-void vmp_native_disable(void) {
-}
-
+PY_WIN_THREAD_STATE * get_current_thread_state(void);
+int vmprof_enable(int memory, int native, int real_time);
+int vmprof_disable(void);
+void vmprof_ignore_signals(int ignored);
+int vmp_native_enable(void);
+void vmp_native_disable(void);
 int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
-		    int max_depth, intptr_t pc)
-{
-    return 0;
-}
+                    int max_depth, intptr_t pc);
diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -2,25 +2,43 @@
 import urllib2, py
 from os.path import join
 
+RVMPROF = py.path.local(__file__).join('..', '..')
 
 def github_raw_file(repo, path, branch='master'):
-    return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict(
-                repo=repo, path=path, branch=branch
-            ))
+    url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}"
+    return url.format(repo=repo, path=path, branch=branch)
 
+def get_list_of_files(shared):
+    files = list(shared.visit('*.[ch]'))
+    # in PyPy we checkin the result of ./configure; as such, these files are
+    # not in github and can be skipped
+    files.remove(shared.join('libbacktrace', 'config-x86_32.h'))
+    files.remove(shared.join('libbacktrace', 'config-x86_64.h'))
+    files.remove(shared.join('libbacktrace', 'gstdint.h'))
+    return files
 
 def test_same_file():
-    for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'):
-        for file in files:
-            if not (file.endswith(".c") or file.endswith(".h")):
-                continue
-            url = github_raw_file("vmprof/vmprof-python", "src/%s" % file)
-            source = urllib2.urlopen(url).read()
-            #
-            dest = py.path.local(join(root, file)).read()
-            if source != dest:
-                raise AssertionError("%s was updated, but changes were"
-                                     "not copied over to PyPy" % url)
-            else:
-                print("%s matches" % url)
-        break # do not walk dirs
+    shared = RVMPROF.join('src', 'shared')
+    files = get_list_of_files(shared)
+    assert files, 'cannot find any C file, probably the directory is wrong?'
+    no_matches = []
+    print
+    for file in files:
+        path = file.relto(shared)
+        url = github_raw_file("vmprof/vmprof-python", "src/%s" % path)
+        source = urllib2.urlopen(url).read()
+        dest = file.read()
+        shortname = file.relto(RVMPROF)
+        if source == dest:
+            print '%s matches' % shortname
+        else:
+            print '%s does NOT match' % shortname
+            no_matches.append(file)
+    #
+    if no_matches:
+        print
+        print 'The following file dit NOT match'
+        for f in no_matches:
+            print '   ', f.relto(RVMPROF)
+        raise AssertionError("some files were updated on github, "
+                             "but were not copied here")
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -1,214 +1,183 @@
 import py, os
+import pytest
+import time
 from rpython.tool.udir import udir
 from rpython.rlib import rvmprof
 from rpython.translator.c.test.test_genc import compile
-from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.nonconst import NonConstant
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import rffi, lltype
 
+ at pytest.mark.usefixtures('init')
+class RVMProfTest(object):
 
-def test_vmprof_execute_code_1():
+    ENTRY_POINT_ARGS = ()
 
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
+    class MyCode(object):
+        def __init__(self, name='py:code:0:noname'):
+            self.name = name
 
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
+        def get_name(self):
+            return self.name
+
+    @pytest.fixture
+    def init(self):
+        self.register()
+        self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS)
+
+    def register(self):
+        rvmprof.register_code_object_class(self.MyCode,
+                                           self.MyCode.get_name)
+
+
+class TestExecuteCode(RVMProfTest):
+
+    def entry_point(self):
+        res = self.main(self.MyCode(), 5)
+        assert res == 42
+        return 0
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
         print num
         return 42
 
-    def f():
-        res = main(MyCode(), 5)
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
+
+
+class TestResultClass(RVMProfTest):
+
+    class A: pass
+
+    @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code,
+                                 result_class=A)
+    def main(self, num, code):
+        print num
+        return self.A()
+
+    def entry_point(self):
+        a = self.main(7, self.MyCode())
+        assert isinstance(a, self.A)
+        return 0
+
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
+
+
+class TestRegisterCode(RVMProfTest):
+    
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
+        print num
+        return 42
+
+    def entry_point(self):
+        code = self.MyCode()
+        rvmprof.register_code(code, lambda code: 'some code')
+        res = self.main(code, 5)
         assert res == 42
         return 0
 
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
 
 
-def test_vmprof_execute_code_2():
+class RVMProfSamplingTest(RVMProfTest):
 
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
+    # the kernel will deliver SIGPROF at max 250 Hz. See also
+    # https://github.com/vmprof/vmprof-python/issues/163
+    SAMPLING_INTERVAL = 1/250.0
 
-    class A:
-        pass
+    @pytest.fixture
+    def init(self, tmpdir):
+        self.tmpdir = tmpdir
+        self.tmpfile = tmpdir.join('profile.vmprof')
+        self.tmpfilename = str(self.tmpfile)
+        super(RVMProfSamplingTest, self).init()
 
-    @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code,
-                                 result_class=A)
-    def main(num, code):
-        print num
-        return A()
+    ENTRY_POINT_ARGS = (int, float)
+    def entry_point(self, value, delta_t):
+        code = self.MyCode('py:code:52:test_enable')
+        rvmprof.register_code(code, self.MyCode.get_name)
+        fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
+        rvmprof.enable(fd, self.SAMPLING_INTERVAL)
+        start = time.time()
+        res = 0
+        while time.time() < start+delta_t:
+            res = self.main(code, value)
+        rvmprof.disable()
+        os.close(fd)
+        return res
 
-    def f():
-        a = main(7, MyCode())
-        assert isinstance(a, A)
-        return 0
+    def approx_equal(self, a, b, tolerance=0.1):
+        max_diff = (a+b)/2.0 * tolerance
+        return abs(a-b) < max_diff
 
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
 
+class TestEnable(RVMProfSamplingTest):
 
-def test_register_code():
-
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        print num
-        return 42
-
-    def f():
-        code = MyCode()
-        rvmprof.register_code(code, lambda code: 'some code')
-        res = main(code, 5)
-        assert res == 42
-        return 0
-
-    assert f() == 0
-    fn = compile(f, [], gcpolicy="minimark")
-    assert fn() == 0
-
-
-def test_enable():
-
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        print num
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
         s = 0
-        for i in range(num):
+        for i in range(count):
             s += (i << 1)
-            if s % 2123423423 == 0:
-                print s
         return s
 
-    tmpfilename = str(udir.join('test_rvmprof'))
+    def test(self):
+        from vmprof import read_profile
+        assert self.entry_point(10**4, 0.1) == 99990000
+        assert self.tmpfile.check()
+        self.tmpfile.remove()
+        #
+        assert self.rpy_entry_point(10**4, 0.5) == 99990000
+        assert self.tmpfile.check()
+        prof = read_profile(self.tmpfilename)
+        tree = prof.get_tree()
+        assert tree.name == 'py:code:52:test_enable'
+        assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL)
 
-    def f():
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
-        code = MyCode()
-        rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
-        if we_are_translated():
-            num = 100000000
-            period = 0.0001
+
+class TestNative(RVMProfSamplingTest):
+
+    @pytest.fixture
+    def init(self, tmpdir):
+        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+                separate_module_sources=["""
+                RPY_EXTERN int native_func(int d) {
+                    int j = 0;
+                    if (d > 0) {
+                        return native_func(d-1);
+                    } else {
+                        for (int i = 0; i < 42000; i++) {
+                            j += 1;
+                        }
+                    }
+                    return j;
+                }
+                """])
+        self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
+                                           compilation_info=eci)
+        super(TestNative, self).init(tmpdir)
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
+        if count > 0:
+            return self.main(code, count-1)
         else:
-            num = 10000
-            period = 0.9
-        rvmprof.enable(fd, period)
-        res = main(code, num)
-        #assert res == 499999500000
-        rvmprof.disable()
-        os.close(fd)
-        return 0
+            return self.native_func(100)
 
-    def check_profile(filename):
-        from vmprof import read_profile
-
-        prof = read_profile(filename)
-        assert prof.get_tree().name.startswith("py:")
-        assert prof.get_tree().count
-
-    assert f() == 0
-    assert os.path.exists(tmpfilename)
-    fn = compile(f, [], gcpolicy="minimark")
-    assert fn() == 0
-    try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
-
-def test_native():
-    eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
-            separate_module_sources=["""
-            RPY_EXTERN int native_func(int d) {
-                int j = 0;
-                if (d > 0) {
-                    return native_func(d-1);
-                } else {
-                    for (int i = 0; i < 42000; i++) {
-                        j += d;
-                    }
-                }
-                return j;
-            }
-            """])
-
-    native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
-                                  compilation_info=eci)
-
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        if num > 0:
-            return main(code, num-1)
-        else:
-            return native_func(100)
-
-    tmpfilename = str(udir.join('test_rvmprof'))
-
-    def f():
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
-        code = MyCode()
-        rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666)
-        num = 10000
-        period = 0.0001
-        rvmprof.enable(fd, period, native=1)
-        for i in range(num):
-            res = main(code, 3)
-        rvmprof.disable()
-        os.close(fd)
-        return 0
-
-    def check_profile(filename):
+    def test(self):
+        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
+        # never ran it. FIXME.
         from vmprof import read_profile
         from vmprof.show import PrettyPrinter
-
-        prof = read_profile(filename)
+        assert self.rpy_entry_point(3, 0.5) == 42000
+        assert self.tmpfile.check()
+        #
+        prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
         p = PrettyPrinter()
         p._print_tree(tree)
@@ -227,16 +196,3 @@
                     del not_found[i]
                     break
         assert not_found == []
-
-    fn = compile(f, [], gcpolicy="incminimark", lldebug=True)
-    assert fn() == 0
-    try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
-
diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py
--- a/rpython/translator/translator.py
+++ b/rpython/translator/translator.py
@@ -141,6 +141,9 @@
     if isinstance(func, FunctionGraph):
         return func
     result = []
+    if hasattr(func, 'im_func'):
+        # make it possible to translate bound methods
+        func = func.im_func
     for graph in translator.graphs:
         if getattr(graph, 'func', None) is func:
             result.append(graph)

From pypy.commits at gmail.com  Wed Nov  8 11:47:47 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 08 Nov 2017 08:47:47 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-0.4.10: close this branch to be merged
Message-ID: <5a033533.cc8ddf0a.830c6.4563@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-0.4.10
Changeset: r92972:1b871922f356
Date: 2017-11-08 17:41 +0100
http://bitbucket.org/pypy/pypy/changeset/1b871922f356/

Log:	close this branch to be merged


From pypy.commits at gmail.com  Wed Nov  8 11:49:48 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 08 Nov 2017 08:49:48 -0800 (PST)
Subject: [pypy-commit] pypy default: update vmprof up to github rev c8154361
Message-ID: <5a0335ac.c97e1c0a.2b7ff.b964@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r92974:b207c72d71ad
Date: 2017-11-08 17:49 +0100
http://bitbucket.org/pypy/pypy/changeset/b207c72d71ad/

Log:	update vmprof up to github rev c8154361

diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c
--- a/rpython/rlib/rvmprof/src/shared/machine.c
+++ b/rpython/rlib/rvmprof/src/shared/machine.c
@@ -28,7 +28,7 @@
 #elif __linux__
     return "linux";
 #elif __FreeBSD__
-    return "freebsd"
+    return "freebsd";
 #else
     #error "Unknown compiler"
 #endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
--- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -29,6 +29,7 @@
 static int (*unw_is_signal_frame)(unw_cursor_t *) = NULL;
 static int (*unw_getcontext)(unw_context_t *) = NULL;
 #else
+#define UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 

From pypy.commits at gmail.com  Wed Nov  8 13:17:13 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 08 Nov 2017 10:17:13 -0800 (PST)
Subject: [pypy-commit] pypy default: Check behaviour of bytearray as well
Message-ID: <5a034a29.88acdf0a.4f669.2b89@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r92975:461d62b49f22
Date: 2017-11-08 18:16 +0000
http://bitbucket.org/pypy/pypy/changeset/461d62b49f22/

Log:	Check behaviour of bytearray as well

diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
--- a/extra_tests/test_bytes.py
+++ b/extra_tests/test_bytes.py
@@ -1,25 +1,27 @@
 from hypothesis import strategies as st
 from hypothesis import given, example
 
- at given(st.binary(), st.binary(), st.binary())
+st_bytestring = st.binary() | st.binary().map(bytearray)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_find(u, prefix, suffix):
     s = prefix + u + suffix
     assert 0 <= s.find(u) <= len(prefix)
     assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_index(u, prefix, suffix):
     s = prefix + u + suffix
     assert 0 <= s.index(u) <= len(prefix)
     assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_rfind(u, prefix, suffix):
     s = prefix + u + suffix
     assert s.rfind(u) >= len(prefix)
     assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_rindex(u, prefix, suffix):
     s = prefix + u + suffix
     assert s.rindex(u) >= len(prefix)
@@ -34,20 +36,20 @@
         start = max(start + len(u), 0)
     return start, end
 
- at given(st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring)
 def test_startswith_basic(u, v):
     assert u.startswith(v) is (u[:len(v)] == v)
 
 @example(b'x', b'', 1)
 @example(b'x', b'', 2)
- at given(st.binary(), st.binary(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers())
 def test_startswith_start(u, v, start):
     expected = u[start:].startswith(v) if v else (start <= len(u))
     assert u.startswith(v, start) is expected
 
 @example(b'x', b'', 1, 0)
 @example(b'xx', b'', -1, 0)
- at given(st.binary(), st.binary(), st.integers(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
 def test_startswith_3(u, v, start, end):
     if v:
         expected = u[start:end].startswith(v)
@@ -56,7 +58,7 @@
         expected = start0 <= len(u) and start0 <= end0
     assert u.startswith(v, start, end) is expected
 
- at given(st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring)
 def test_endswith_basic(u, v):
     if len(v) > len(u):
         assert u.endswith(v) is False
@@ -65,14 +67,14 @@
 
 @example(b'x', b'', 1)
 @example(b'x', b'', 2)
- at given(st.binary(), st.binary(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers())
 def test_endswith_2(u, v, start):
     expected = u[start:].endswith(v) if v else (start <= len(u))
     assert u.endswith(v, start) is expected
 
 @example(b'x', b'', 1, 0)
 @example(b'xx', b'', -1, 0)
- at given(st.binary(), st.binary(), st.integers(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
 def test_endswith_3(u, v, start, end):
     if v:
         expected = u[start:end].endswith(v)

From pypy.commits at gmail.com  Wed Nov  8 13:34:04 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 08 Nov 2017 10:34:04 -0800 (PST)
Subject: [pypy-commit] pypy default: remove unused distutils_platform
Message-ID: <5a034e1c.c6a2df0a.13ea3.517e@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92976:3d3ef332444f
Date: 2017-11-08 20:28 +0200
http://bitbucket.org/pypy/pypy/changeset/3d3ef332444f/

Log:	remove unused distutils_platform

diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py
--- a/rpython/translator/platform/__init__.py
+++ b/rpython/translator/platform/__init__.py
@@ -320,9 +320,7 @@
     else:
         host_factory = Cygwin64
 else:
-    # pray
-    from rpython.translator.platform.distutils_platform import DistutilsPlatform
-    host_factory = DistutilsPlatform
+    raise ValueError('unknown sys.platform "%s"', sys.platform)
 
 platform = host = host_factory()
 
@@ -335,9 +333,6 @@
     elif new_platform == 'arm':
         from rpython.translator.platform.arm import ARM
         return ARM(cc)
-    elif new_platform == 'distutils':
-        from rpython.translator.platform.distutils_platform import DistutilsPlatform
-        return DistutilsPlatform()
     else:
         raise ValueError("platform = %s" % (new_platform,))
 
diff --git a/rpython/translator/platform/distutils_platform.py b/rpython/translator/platform/distutils_platform.py
deleted file mode 100644
--- a/rpython/translator/platform/distutils_platform.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import py, os, sys
-
-from rpython.translator.platform import Platform, log, CompilationError
-from rpython.translator.tool import stdoutcapture
-
-def log_spawned_cmd(spawn):
-    def spawn_and_log(cmd, *args, **kwds):
-        log.execute(' '.join(cmd))
-        return spawn(cmd, *args, **kwds)
-    return spawn_and_log
-
-CFLAGS = ['-O3']
-
-if os.name != 'nt':
-    so_ext = 'so'
-else:
-    so_ext = 'dll'
-
-class DistutilsPlatform(Platform):
-    """ This is a generic distutils platform. I hope it'll go away at some
-    point soon completely
-    """
-    name = "distutils"
-    so_ext = so_ext
-    
-    def __init__(self, cc=None):
-        self.cc = cc
-        if self.name == "distutils":
-            self.name = sys.platform
-    
-    def _ensure_correct_math(self):
-        if self.name != 'win32':
-            return # so far
-        from distutils import sysconfig
-        gcv = sysconfig.get_config_vars()
-        opt = gcv.get('OPT') # not always existent
-        if opt and '/Op' not in opt:
-            opt += '/Op'
-        gcv['OPT'] = opt
-    
-    def compile(self, cfilenames, eci, outputfilename=None, standalone=True):
-        self._ensure_correct_math()
-        self.cfilenames = cfilenames
-        if standalone:
-            ext = ''
-        else:
-            ext = so_ext
-        self.standalone = standalone
-        self.libraries = list(eci.libraries)
-        self.include_dirs = list(eci.include_dirs)
-        self.library_dirs = list(eci.library_dirs)
-        self.compile_extra = list(eci.compile_extra)
-        self.link_extra = list(eci.link_extra)
-        self.frameworks = list(eci.frameworks)
-        if not self.name in ('win32', 'darwin', 'cygwin'): # xxx
-            if 'm' not in self.libraries:
-                self.libraries.append('m')
-            self.compile_extra += CFLAGS + ['-fomit-frame-pointer']
-            if 'pthread' not in self.libraries:
-                self.libraries.append('pthread')
-            if self.name != 'sunos5': 
-                self.compile_extra += ['-pthread']
-                self.link_extra += ['-pthread']
-            else:
-                self.compile_extra += ['-pthreads']
-                self.link_extra += ['-lpthread']
-        if self.name == 'win32':
-            self.link_extra += ['/DEBUG'] # generate .pdb file
-        if self.name == 'darwin':
-            # support Fink & Darwinports
-            for s in ('/sw/', '/opt/local/'):
-                if s + 'include' not in self.include_dirs and \
-                   os.path.exists(s + 'include'):
-                    self.include_dirs.append(s + 'include')
-                if s + 'lib' not in self.library_dirs and \
-                   os.path.exists(s + 'lib'):
-                    self.library_dirs.append(s + 'lib')
-            self.compile_extra += CFLAGS + ['-fomit-frame-pointer']
-            for framework in self.frameworks:
-                self.link_extra += ['-framework', framework]
-
-        if outputfilename is None:
-            self.outputfilename = py.path.local(cfilenames[0]).new(ext=ext)
-        else:
-            self.outputfilename = py.path.local(outputfilename)
-        self.eci = eci
-        import distutils.errors
-        basename = self.outputfilename.new(ext='')
-        data = ''
-        try:
-            saved_environ = os.environ.copy()
-            c = stdoutcapture.Capture(mixed_out_err=True)
-            try:
-                self._build()
-            finally:
-                # workaround for a distutils bugs where some env vars can
-                # become longer and longer every time it is used
-                for key, value in saved_environ.items():
-                    if os.environ.get(key) != value:
-                        os.environ[key] = value
-                foutput, foutput = c.done()
-                data = foutput.read()
-                if data:
-                    fdump = basename.new(ext='errors').open("wb")
-                    fdump.write(data)
-                    fdump.close()
-        except (distutils.errors.CompileError,
-                distutils.errors.LinkError):
-            raise CompilationError('', data)
-        except:
-            print >>sys.stderr, data
-            raise
-        return self.outputfilename
-
-    def _build(self):
-        from distutils.ccompiler import new_compiler
-        from distutils import sysconfig
-        compiler = new_compiler(force=1)
-        if self.cc is not None:
-            for c in '''compiler compiler_so compiler_cxx
-                        linker_exe linker_so'''.split():
-                compiler.executables[c][0] = self.cc
-        if not self.standalone:
-            sysconfig.customize_compiler(compiler) # XXX
-        compiler.spawn = log_spawned_cmd(compiler.spawn)
-        objects = []
-        for cfile in self.cfilenames:
-            cfile = py.path.local(cfile)
-            compile_extra = self.compile_extra[:]
-
-            old = cfile.dirpath().chdir()
-            try:
-                res = compiler.compile([cfile.basename],
-                                       include_dirs=self.eci.include_dirs,
-                                       extra_preargs=compile_extra)
-                assert len(res) == 1
-                cobjfile = py.path.local(res[0])
-                assert cobjfile.check()
-                objects.append(str(cobjfile))
-            finally:
-                old.chdir()
-
-        if self.standalone:
-            cmd = compiler.link_executable
-        else:
-            cmd = compiler.link_shared_object
-        cmd(objects, str(self.outputfilename),
-            libraries=self.eci.libraries,
-            extra_preargs=self.link_extra,
-            library_dirs=self.eci.library_dirs)
-
-    def _include_dirs_for_libffi(self):
-        return ['/usr/include/libffi']
-
-    def _library_dirs_for_libffi(self):
-        return ['/usr/lib/libffi']
-
diff --git a/rpython/translator/platform/test/test_distutils.py b/rpython/translator/platform/test/test_distutils.py
deleted file mode 100644
--- a/rpython/translator/platform/test/test_distutils.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest
-from rpython.translator.platform.distutils_platform import DistutilsPlatform
-import py
-
-class TestDistutils(BasicTest):
-    platform = DistutilsPlatform()
-
-    def test_nice_errors(self):
-        py.test.skip("Unsupported")
-
-    def test_900_files(self):
-        py.test.skip('Makefiles not suppoerted')
-
-    def test_precompiled_headers(self):
-        py.test.skip('Makefiles not suppoerted')
-

From pypy.commits at gmail.com  Wed Nov  8 15:00:34 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 08 Nov 2017 12:00:34 -0800 (PST)
Subject: [pypy-commit] pypy default: remove maemo platform
Message-ID: <5a036262.21b9df0a.93d86.9fb8@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92977:73ab8f585ba4
Date: 2017-11-08 21:59 +0200
http://bitbucket.org/pypy/pypy/changeset/73ab8f585ba4/

Log:	remove maemo platform

diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -39,9 +39,7 @@
 CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache'))
 
 PLATFORMS = [
-    'maemo',
     'host',
-    'distutils',
     'arm',
 ]
 
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -1102,22 +1102,6 @@
         assert out.strip() == 'ok'
 
 
-class TestMaemo(TestStandalone):
-    def setup_class(cls):
-        py.test.skip("TestMaemo: tests skipped for now")
-        from rpython.translator.platform.maemo import check_scratchbox
-        check_scratchbox()
-        config = get_combined_translation_config(translating=True)
-        config.translation.platform = 'maemo'
-        cls.config = config
-
-    def test_profopt(self):
-        py.test.skip("Unsupported")
-
-    def test_prof_inline(self):
-        py.test.skip("Unsupported")
-
-
 class TestThread(object):
     gcrootfinder = 'shadowstack'
     config = None
diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py
--- a/rpython/translator/platform/__init__.py
+++ b/rpython/translator/platform/__init__.py
@@ -327,9 +327,6 @@
 def pick_platform(new_platform, cc):
     if new_platform == 'host':
         return host_factory(cc)
-    elif new_platform == 'maemo':
-        from rpython.translator.platform.maemo import Maemo
-        return Maemo(cc)
     elif new_platform == 'arm':
         from rpython.translator.platform.arm import ARM
         return ARM(cc)
diff --git a/rpython/translator/platform/maemo.py b/rpython/translator/platform/maemo.py
deleted file mode 100644
--- a/rpython/translator/platform/maemo.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Support for Maemo."""
-
-import py, os
-
-from rpython.tool.udir import udir
-from rpython.translator.platform import ExecutionResult, log
-from rpython.translator.platform.linux import Linux
-from rpython.translator.platform.posix import GnuMakefile, _run_subprocess
-
-def check_scratchbox():
-    # in order to work, that file must exist and be executable by us
-    if not os.access('/scratchbox/login', os.X_OK):
-        py.test.skip("No scratchbox detected")
-
-class Maemo(Linux):
-    name = "maemo"
-    
-    available_includedirs = ('/usr/include', '/tmp')
-    copied_cache = {}
-
-    def _invent_new_name(self, basepath, base):
-        pth = basepath.join(base)
-        num = 0
-        while pth.check():
-            pth = basepath.join('%s_%d' % (base,num))
-            num += 1
-        return pth.ensure(dir=1)
-
-    def _copy_files_to_new_dir(self, dir_from, pattern='*.[ch]'):
-        try:
-            return self.copied_cache[dir_from]
-        except KeyError:
-            new_dirpath = self._invent_new_name(udir, 'copied_includes')
-            files = py.path.local(dir_from).listdir(pattern)
-            for f in files:
-                f.copy(new_dirpath)
-            # XXX <hack for pypy>
-            srcdir = py.path.local(dir_from).join('src')
-            if srcdir.check(dir=1):
-                target = new_dirpath.join('src').ensure(dir=1)
-                for f in srcdir.listdir(pattern):
-                    f.copy(target)
-            # XXX </hack for pypy>
-            self.copied_cache[dir_from] = new_dirpath
-            return new_dirpath
-    
-    def _preprocess_include_dirs(self, include_dirs):
-        """ Tweak includedirs so they'll be available through scratchbox
-        """
-        res_incl_dirs = []
-        for incl_dir in include_dirs:
-            incl_dir = py.path.local(incl_dir)
-            for available in self.available_includedirs:
-                if incl_dir.relto(available):
-                    res_incl_dirs.append(str(incl_dir))
-                    break
-            else:
-                # we need to copy files to a place where it's accessible
-                res_incl_dirs.append(self._copy_files_to_new_dir(incl_dir))
-        return res_incl_dirs
-    
-    def _execute_c_compiler(self, cc, args, outname):
-        log.execute('/scratchbox/login ' + cc + ' ' + ' '.join(args))
-        args = [cc] + args
-        returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args)
-        self._handle_error(returncode, stdout, stderr, outname)
-    
-    def execute(self, executable, args=[], env=None):
-        if isinstance(args, str):
-            args = str(executable) + ' ' + args
-            log.message('executing /scratchbox/login ' + args)
-        else:
-            args = [str(executable)] + args
-            log.message('executing /scratchbox/login ' + ' '.join(args))
-        returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args,
-                                                     env)
-        return ExecutionResult(returncode, stdout, stderr)
-
-    def _include_dirs_for_libffi(self):
-        # insanely obscure dir
-        return ['/usr/include/arm-linux-gnueabi/']
-
-    def _library_dirs_for_libffi(self):
-        # on the other hand, library lands in usual place...
-        return []
-
-    def execute_makefile(self, path_to_makefile, extra_opts=[]):
-        if isinstance(path_to_makefile, GnuMakefile):
-            path = path_to_makefile.makefile_dir
-        else:
-            path = path_to_makefile
-        log.execute('make %s in %s' % (" ".join(extra_opts), path))
-        returncode, stdout, stderr = _run_subprocess(
-            '/scratchbox/login', ['make', '-C', str(path)] + extra_opts)
-        self._handle_error(returncode, stdout, stderr, path.join('make'))
diff --git a/rpython/translator/platform/test/test_maemo.py b/rpython/translator/platform/test/test_maemo.py
deleted file mode 100644
--- a/rpython/translator/platform/test/test_maemo.py
+++ /dev/null
@@ -1,37 +0,0 @@
-
-""" File containing maemo platform tests
-"""
-
-import py
-from rpython.tool.udir import udir
-from rpython.translator.platform.maemo import Maemo, check_scratchbox
-from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-
-class TestMaemo(BasicTest):
-    platform = Maemo()
-    strict_on_stderr = False
-
-    def setup_class(cls):
-        py.test.skip("TestMaemo: tests skipped for now")
-        check_scratchbox()
-
-    def test_includes_outside_scratchbox(self):
-        cfile = udir.join('test_includes_outside_scratchbox.c')
-        cfile.write('''
-        #include <stdio.h>
-        #include "test.h"
-        int main()
-        {
-            printf("%d\\n", XXX_STUFF);
-            return 0;
-        }
-        ''')
-        includedir = py.path.local(__file__).dirpath().join('include')
-        eci = ExternalCompilationInfo(include_dirs=(includedir,))
-        executable = self.platform.compile([cfile], eci)
-        res = self.platform.execute(executable)
-        self.check_res(res)
-
-    def test_environment_inheritance(self):
-        py.test.skip("FIXME")
diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py
--- a/rpython/translator/platform/test/test_platform.py
+++ b/rpython/translator/platform/test/test_platform.py
@@ -151,6 +151,6 @@
     assert platform.host == platform.platform
 
     assert platform.is_host_build()
-    platform.set_platform('maemo', None)
+    platform.set_platform('arm', None)
     assert platform.host != platform.platform
     assert not platform.is_host_build()

From pypy.commits at gmail.com  Wed Nov  8 15:07:33 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 08 Nov 2017 12:07:33 -0800 (PST)
Subject: [pypy-commit] pypy default: the only possible non-host platform is
 arm, not sure it works
Message-ID: <5a036405.178fdf0a.93bfd.18dd@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92978:7ba4c7f12fd5
Date: 2017-11-08 22:06 +0200
http://bitbucket.org/pypy/pypy/changeset/7ba4c7f12fd5/

Log:	the only possible non-host platform is arm, not sure it works

diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py
--- a/rpython/translator/platform/test/test_platform.py
+++ b/rpython/translator/platform/test/test_platform.py
@@ -147,10 +147,13 @@
 
 
 def test_is_host_build():
+    from platform import machine
     from rpython.translator import platform
     assert platform.host == platform.platform
 
     assert platform.is_host_build()
-    platform.set_platform('arm', None)
-    assert platform.host != platform.platform
-    assert not platform.is_host_build()
+    # do we support non-host builds?
+    if machine().startswith('arm'):
+        platform.set_platform('arm', None)
+        assert platform.host != platform.platform
+        assert not platform.is_host_build()

From pypy.commits at gmail.com  Wed Nov  8 16:14:42 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 08 Nov 2017 13:14:42 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-win32: close outdated branch
Message-ID: <5a0373c2.d5301c0a.234af.66a6@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: vmprof-win32
Changeset: r92979:7aada6f7b5bb
Date: 2017-11-08 23:01 +0200
http://bitbucket.org/pypy/pypy/changeset/7aada6f7b5bb/

Log:	close outdated branch


From pypy.commits at gmail.com  Wed Nov  8 16:14:44 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 08 Nov 2017 13:14:44 -0800 (PST)
Subject: [pypy-commit] pypy win32-vmprof: start to run tests on win32
Message-ID: <5a0373c4.8cabdf0a.5316d.aae7@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vmprof
Changeset: r92980:edb8f85891e5
Date: 2017-11-08 23:17 +0200
http://bitbucket.org/pypy/pypy/changeset/edb8f85891e5/

Log:	start to run tests on win32

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -62,7 +62,6 @@
         SHARED.join('compat.c'),
         SHARED.join('machine.c'),
         SHARED.join('vmp_stack.c'),
-        SHARED.join('vmprof_mt.c'),
         SHARED.join('vmprof_memory.c'),
         SHARED.join('vmprof_common.c'),
         # symbol table already in separate_module_files
@@ -70,6 +69,10 @@
     post_include_bits=[],
     compile_extra=compile_extra
     )
+if sys.platform.startswith('linux'):
+    eci_kwds['separate_module_files'].append(
+        SHARED.join('vmprof_mt.c'),
+    )
 global_eci = ExternalCompilationInfo(**eci_kwds)
 
 def configure_libbacktrace_linux():
diff --git a/rpython/rlib/rvmprof/test/__init__.py b/rpython/rlib/rvmprof/test/__init__.py
--- a/rpython/rlib/rvmprof/test/__init__.py
+++ b/rpython/rlib/rvmprof/test/__init__.py
@@ -1,5 +0,0 @@
-import pytest
-import platform
-
-if not platform.machine().startswith('x86'):
-    pytest.skip()
diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -25,8 +25,9 @@
     print
     for file in files:
         path = file.relto(shared)
+        path = path.replace(os.sep, '/')
         url = github_raw_file("vmprof/vmprof-python", "src/%s" % path)
-        source = urllib2.urlopen(url).read()
+        source = urllib2.urlopen(url).read().replace('\r\n', '\n')
         dest = file.read()
         shortname = file.relto(RVMPROF)
         if source == dest:

From pypy.commits at gmail.com  Wed Nov  8 20:55:39 2017
From: pypy.commits at gmail.com (fijal)
Date: Wed, 08 Nov 2017 17:55:39 -0800 (PST)
Subject: [pypy-commit] pypy default: "eh". On pypy we need to be careful in
 which order we have pendingblocks.
Message-ID: <5a03b59b.c39cdf0a.b72ee.bc53@mx.google.com>

Author: fijal
Branch: 
Changeset: r92981:cb9634421fa2
Date: 2017-11-08 17:54 -0800
http://bitbucket.org/pypy/pypy/changeset/cb9634421fa2/

Log:	"eh". On pypy we need to be careful in which order we have
	pendingblocks. Otherwise we end up in a setup where we have blocks
	a, b and c where a and b are blocked because c needs to add an
	attribute, but c is never appended since popitem() would always
	return an a or b. I wonder if the same condition can be repeated on
	CPython, but I cannot. Unclear how would you write a test for it
	since it depends on dictionary order.

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -15,10 +15,34 @@
     typeof, s_ImpossibleValue, SomeInstance, intersection, difference)
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
+from collections import deque
 
 log = AnsiLogger("annrpython")
 
 
+class ShuffleDict(object):
+    def __init__(self):
+        self._d = {}
+        self.keys = deque()
+
+    def __setitem__(self, k, v):
+        if k in self._d:
+            self._d[k] = v
+        else:
+            self._d[k] = v
+            self.keys.append(k)
+
+    def __getitem__(self, k):
+        return self._d[k]
+
+    def popitem(self):
+        key = self.keys.popleft()
+        item = self._d.pop(key)
+        return (key, item)
+
+    def __nonzero__(self):
+        return bool(self._d)
+
 class RPythonAnnotator(object):
     """Block annotator for RPython.
     See description in doc/translation.txt."""
@@ -33,7 +57,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = {}  # map {block: graph-containing-it}
+        self.pendingblocks = ShuffleDict()  # map {block: graph-containing-it}
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed

From pypy.commits at gmail.com  Thu Nov  9 07:38:27 2017
From: pypy.commits at gmail.com (stian)
Date: Thu, 09 Nov 2017 04:38:27 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Remove unused variable and
 make these size calculations unsigned
Message-ID: <5a044c43.d2addf0a.ed1d0.1282@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92982:1a7dc37b2d5d
Date: 2017-11-09 13:37 +0100
http://bitbucket.org/pypy/pypy/changeset/1a7dc37b2d5d/

Log:	Remove unused variable and make these size calculations unsigned

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -848,14 +848,14 @@
                 mod = self.int_and_(digit - 1)
             else:
                 # Perform
-                size = self.numdigits() - 1
+                size = UDIGIT_TYPE(self.numdigits() - 1)
                 
                 if size > 0:
                     rem = self.widedigit(size)
-                    size -= 1
-                    while size >= 0:
+                    while size > 0:
+                        size -= 1
                         rem = ((rem << SHIFT) | self.digit(size)) % digit
-                        size -= 1
+                        
                 else:
                     rem = self.widedigit(0) % digit
 
@@ -890,13 +890,13 @@
                 mod = self.int_and_(digit - 1)
             else:
                 # Perform
-                size = self.numdigits() - 1
+                size = UDIGIT_TYPE(self.numdigits() - 1)
+                
                 if size > 0:
                     rem = self.widedigit(size)
-                    size -= 1
-                    while size >= 0:
+                    while size > 0:
+                        size -= 1
                         rem = ((rem << SHIFT) | self.digit(size)) % digit
-                        size -= 1
                 else:
                     rem = self.digit(0) % digit
 
@@ -981,7 +981,7 @@
             # XXX failed to implement
             raise ValueError("bigint pow() too negative")
 
-        size_b = b.numdigits()
+        size_b = UDIGIT_TYPE(b.numdigits())
 
         if b.sign == 0:
             return ONERBIGINT
@@ -1040,8 +1040,9 @@
         if size_b <= FIVEARY_CUTOFF:
             # Left-to-right binary exponentiation (HAC Algorithm 14.79)
             # http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf
-            size_b -= 1
-            while size_b >= 0:
+
+            while size_b > 0:
+                size_b -= 1
                 bi = b.digit(size_b)
                 j = 1 << (SHIFT-1)
                 while j != 0:
@@ -1049,7 +1050,7 @@
                     if bi & j:
                         z = _help_mult(z, a, c)
                     j >>= 1
-                size_b -= 1
+                
 
         else:
             # Left-to-right 5-ary exponentiation (HAC Algorithm 14.82)
@@ -1328,7 +1329,7 @@
         hishift = SHIFT - loshift
         z = rbigint([NULLDIGIT] * newsize, self.sign, newsize)
         i = 0
-        inverted = False
+
         while i < newsize:
             digit = self.udigit(wordshift)
             if invert and i == 0 and wordshift == 0:

From pypy.commits at gmail.com  Thu Nov  9 13:09:26 2017
From: pypy.commits at gmail.com (stian)
Date: Thu, 09 Nov 2017 10:09:26 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Dont need widedigit |
 widedigit, when widedigit | digit will do.
Message-ID: <5a0499d6.05ac1c0a.9ad53.facb@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92983:5c8e47fa96a6
Date: 2017-11-09 19:08 +0100
http://bitbucket.org/pypy/pypy/changeset/5c8e47fa96a6/

Log:	Dont need widedigit | widedigit, when widedigit | digit will do.

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2049,7 +2049,7 @@
         * result in z[0:m], and return the d bits shifted out of the top.
     """
 
-    carry = _unsigned_widen_digit(0)
+    carry = 0
     #assert 0 <= d and d < SHIFT
     i = 0
     while i < m:
@@ -2072,7 +2072,7 @@
     #assert 0 <= d and d < SHIFT
     i = m-1
     while i >= 0:
-        acc = (carry << SHIFT) | a.uwidedigit(i)
+        acc = (carry << SHIFT) | a.udigit(i)
         carry = acc & mask
         z.setdigit(i, acc >> d)
         i -= 1
@@ -2127,10 +2127,10 @@
         else:
             vtop = v.widedigit(j) << SHIFT
         #assert vtop <= wm1
-        vv = vtop | v.widedigit(abs(j-1))
+        vv = vtop | v.digit(abs(j-1))
         q = vv / wm1
         r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q.
-        vj2 = v.widedigit(abs(j-2))
+        vj2 = v.digit(abs(j-2))
         while wm2 * q > ((r << SHIFT) | vj2):
             q -= 1
             r += wm1
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -144,7 +144,7 @@
                 rl_op2 = rbigint.fromlong(op2)
                 r1 = rl_op1.mod(rl_op2)
                 r2 = op1 % op2
-                print op1, op2
+                
                 assert r1.tolong() == r2
 
     def test_int_mod(self):

From pypy.commits at gmail.com  Thu Nov  9 13:12:56 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 09 Nov 2017 10:12:56 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on types
Message-ID: <5a049aa8.14a1df0a.ca2b9.3267@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92984:ffe57298623b
Date: 2017-11-09 18:12 +0000
http://bitbucket.org/pypy/pypy/changeset/ffe57298623b/

Log:	Implement __text_signature__ on types

diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -16,7 +16,7 @@
     @not_rpython
     def __init__(self, __name, __base=None, __total_ordering__=None,
                  __buffer=None, __confirm_applevel_del__=False,
-                 variable_sized=False, **rawdict):
+                 _text_signature_=None, variable_sized=False, **rawdict):
         "initialization-time only"
         self.name = __name
         if __base is None:
@@ -36,6 +36,7 @@
             assert '__del__' not in rawdict
         self.weakrefable = '__weakref__' in rawdict
         self.doc = rawdict.get('__doc__', None)
+        self.text_signature = _text_signature_
         for base in bases:
             self.hasdict |= base.hasdict
             self.weakrefable |= base.weakrefable
diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py
--- a/pypy/objspace/std/objectobject.py
+++ b/pypy/objspace/std/objectobject.py
@@ -280,6 +280,7 @@
     return space.call_function(space.w_list, _objectdir(space, w_obj))
 
 W_ObjectObject.typedef = TypeDef("object",
+    _text_signature_='()',
     __doc__ = "The most base type",
     __new__ = interp2app(descr__new__),
     __subclasshook__ = interp2app(descr___subclasshook__, as_classmethod=True),
diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py
--- a/pypy/objspace/std/test/test_typeobject.py
+++ b/pypy/objspace/std/test/test_typeobject.py
@@ -543,6 +543,13 @@
                type(X).__dict__["__doc__"].__delete__(X))
         assert X.__doc__ == "banana"
 
+    def test_text_signature(self):
+        assert object.__text_signature__ == '()'
+
+        class A:
+            pass
+        assert A.__text_signature__ is None
+
     def test_metaclass_conflict(self):
         """
         class T1(type):
diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -185,6 +185,7 @@
         self.hasuserdel = False
         self.weakrefable = False
         self.w_doc = space.w_None
+        self.text_signature = None
         self.weak_subclasses = []
         self.flag_heaptype = is_heaptype
         self.flag_abstract = False
@@ -975,6 +976,11 @@
         raise oefmt(space.w_TypeError, "can't set %N.__doc__", w_type)
     w_type.setdictvalue(space, '__doc__', w_value)
 
+def type_get_txtsig(space, w_type):
+    if w_type.text_signature is None:
+        return space.w_None
+    return space.newtext(w_type.text_signature)
+
 def descr__dir(space, w_type):
     from pypy.objspace.std.util import _classdir
     return space.call_function(space.w_list, _classdir(space, w_type))
@@ -1062,6 +1068,7 @@
     __mro__ = GetSetProperty(descr_get__mro__),
     __dict__=GetSetProperty(type_get_dict),
     __doc__ = GetSetProperty(descr__doc, descr_set__doc, cls=W_TypeObject, name='__doc__'),
+    __text_signature__=GetSetProperty(type_get_txtsig),
     __dir__ = gateway.interp2app(descr__dir),
     mro = gateway.interp2app(descr_mro),
     __flags__ = GetSetProperty(descr__flags),
@@ -1271,6 +1278,7 @@
     else:
         w_doc = w_self.space.newtext_or_none(instancetypedef.doc)
     w_self.w_doc = w_doc
+    w_self.text_signature = instancetypedef.text_signature
     ensure_common_attributes(w_self)
     #
     # usually 'instancetypedef' is new, i.e. not seen in any base,

From pypy.commits at gmail.com  Thu Nov  9 13:59:44 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 09 Nov 2017 10:59:44 -0800 (PST)
Subject: [pypy-commit] pypy win32-vmprof: wip - shared files must be fixed
 upstream then pulled into here
Message-ID: <5a04a5a0.09a0df0a.5b3a0.b704@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vmprof
Changeset: r92985:e68720efe25c
Date: 2017-11-09 19:57 +0200
http://bitbucket.org/pypy/pypy/changeset/e68720efe25c/

Log:	wip - shared files must be fixed upstream then pulled into here

diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -6,10 +6,8 @@
 #define SINGLE_BUF_SIZE (8192 - 2 * sizeof(unsigned int))
 
 #ifdef VMPROF_WINDOWS
-#include <crtdefs.h>
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-typedef intptr_t ssize_t;
+#include "shared/msiinttypes/inttypes.h"
+#include "shared/msiinttypes/stdint.h"
 #else
 #include <inttypes.h>
 #include <stdint.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
@@ -1,4 +1,8 @@
 #include "vmprof_win.h"
+#ifdef RPYTHON_VMPROF
+#include "common_header.h"
+#include "structdef.h"       /* for struct pypy_threadlocal_s */
+#endif
 
 volatile int thread_started = 0;
 volatile int enabled = 0;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
@@ -1,10 +1,9 @@
 #pragma once
 
+#include "compat.h"
 #include "windows.h"
-#include "compat.h"
 #include "vmp_stack.h"
 #include <tlhelp32.h>
-
 int prepare_concurrent_bufs(void);
 
 // This file has been inspired (but not copied from since the LICENSE
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -144,7 +144,9 @@
 
     @pytest.fixture
     def init(self, tmpdir):
-        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+        compile_flags = []
+        #compile_flags = ['-g', '-O0']
+        eci = ExternalCompilationInfo(compile_extra=compile_flags,
                 separate_module_sources=["""
                 RPY_EXTERN int native_func(int d) {
                     int j = 0;

From pypy.commits at gmail.com  Thu Nov  9 13:59:46 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 09 Nov 2017 10:59:46 -0800 (PST)
Subject: [pypy-commit] pypy win32-vmprof: call get_ident to register
 thread_ident in pypy_threadlocal_s
Message-ID: <5a04a5a2.3bb0df0a.47892.d6ec@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vmprof
Changeset: r92986:351273f6cab2
Date: 2017-11-09 19:58 +0200
http://bitbucket.org/pypy/pypy/changeset/351273f6cab2/

Log:	call get_ident to register thread_ident in pypy_threadlocal_s

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -100,6 +100,8 @@
 
     ENTRY_POINT_ARGS = (int, float)
     def entry_point(self, value, delta_t):
+        from rpython.rlib.rthread import get_ident
+        get_ident()             # register thread_ident for win32
         code = self.MyCode('py:code:52:test_enable')
         rvmprof.register_code(code, self.MyCode.get_name)
         fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)

From pypy.commits at gmail.com  Thu Nov  9 15:20:40 2017
From: pypy.commits at gmail.com (stian)
Date: Thu, 09 Nov 2017 12:20:40 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Merge default
Message-ID: <5a04b898.14a1df0a.ca2b9.3f7f@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92987:92d38b4c73a2
Date: 2017-11-09 19:16 +0100
http://bitbucket.org/pypy/pypy/changeset/92d38b4c73a2/

Log:	Merge default

diff too long, truncating to 2000 out of 6320 lines

diff --git a/_pytest/terminal.py b/_pytest/terminal.py
--- a/_pytest/terminal.py
+++ b/_pytest/terminal.py
@@ -366,11 +366,11 @@
             EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR,
             EXIT_NOTESTSCOLLECTED)
         if exitstatus in summary_exit_codes:
-            self.config.hook.pytest_terminal_summary(terminalreporter=self)
             self.summary_errors()
             self.summary_failures()
             self.summary_warnings()
             self.summary_passes()
+            self.config.hook.pytest_terminal_summary(terminalreporter=self)
         if exitstatus == EXIT_INTERRUPTED:
             self._report_keyboardinterrupt()
             del self._keyboardinterrupt_memo
diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis
diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_bytes.py
@@ -0,0 +1,84 @@
+from hypothesis import strategies as st
+from hypothesis import given, example
+
+st_bytestring = st.binary() | st.binary().map(bytearray)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st_bytestring, st_bytestring)
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_startswith_start(u, v, start):
+    expected = u[start:].startswith(v) if v else (start <= len(u))
+    assert u.startswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st_bytestring, st_bytestring)
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_endswith_2(u, v, start):
+    expected = u[start:].endswith(v) if v else (start <= len(u))
+    assert u.endswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py
--- a/extra_tests/test_unicode.py
+++ b/extra_tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
 import pytest
 from hypothesis import strategies as st
 from hypothesis import given, settings, example
@@ -32,3 +33,89 @@
 @given(s=st.text())
 def test_composition(s, norm1, norm2, norm3):
     assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s)
+
+ at given(st.text(), st.text(), st.text())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.text(), st.text())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_startswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.startswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.text(), st.text())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_endswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.endswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,60 +8,63 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
-        if '_type_' in typedict:
-            ffiarray = _rawffi.Array(typedict['_type_']._ffishape_)
-            res._ffiarray = ffiarray
-            subletter = getattr(typedict['_type_'], '_type_', None)
-            if subletter == 'c':
-                def getvalue(self):
-                    return _rawffi.charp2string(self._buffer.buffer,
-                                                self._length_)
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, str):
-                        _rawffi.rawstring2charp(self._buffer.buffer, val)
-                    else:
-                        for i in range(len(val)):
-                            self[i] = val[i]
-                    if len(val) < self._length_:
-                        self._buffer[len(val)] = '\x00'
-                res.value = property(getvalue, setvalue)
 
-                def getraw(self):
-                    return _rawffi.charp2rawstring(self._buffer.buffer,
-                                                   self._length_)
+        if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
+            return res
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
+        ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
+        subletter = getattr(res._type_, '_type_', None)
+        if subletter == 'c':
+            def getvalue(self):
+                return _rawffi.charp2string(self._buffer.buffer,
+                                            self._length_)
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, str):
+                    _rawffi.rawstring2charp(self._buffer.buffer, val)
+                else:
+                    for i in range(len(val)):
+                        self[i] = val[i]
+                if len(val) < self._length_:
+                    self._buffer[len(val)] = b'\x00'
+            res.value = property(getvalue, setvalue)
 
-                def setraw(self, buffer):
-                    if len(buffer) > self._length_:
-                        raise ValueError("%r too long" % (buffer,))
-                    _rawffi.rawstring2charp(self._buffer.buffer, buffer)
-                res.raw = property(getraw, setraw)
-            elif subletter == 'u':
-                def getvalue(self):
-                    return _rawffi.wcharp2unicode(self._buffer.buffer,
-                                                  self._length_)
+            def getraw(self):
+                return _rawffi.charp2rawstring(self._buffer.buffer,
+                                               self._length_)
 
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, unicode):
-                        target = self._buffer
-                    else:
-                        target = self
-                    for i in range(len(val)):
-                        target[i] = val[i]
-                    if len(val) < self._length_:
-                        target[len(val)] = u'\x00'
-                res.value = property(getvalue, setvalue)
-                
-            if '_length_' in typedict:
-                res._ffishape_ = (ffiarray, typedict['_length_'])
-                res._fficompositesize_ = res._sizeofinstances()
-        else:
-            res._ffiarray = None
+            def setraw(self, buffer):
+                if len(buffer) > self._length_:
+                    raise ValueError("%r too long" % (buffer,))
+                _rawffi.rawstring2charp(self._buffer.buffer, buffer)
+            res.raw = property(getraw, setraw)
+        elif subletter == 'u':
+            def getvalue(self):
+                return _rawffi.wcharp2unicode(self._buffer.buffer,
+                                              self._length_)
+
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, unicode):
+                    target = self._buffer
+                else:
+                    target = self
+                for i in range(len(val)):
+                    target[i] = val[i]
+                if len(val) < self._length_:
+                    target[len(val)] = u'\x00'
+            res.value = property(getvalue, setvalue)
+
+        res._ffishape_ = (ffiarray, res._length_)
+        res._fficompositesize_ = res._sizeofinstances()
         return res
 
     from_address = cdata_from_address
@@ -156,7 +159,7 @@
     l = [self[i] for i in range(start, stop, step)]
     letter = getattr(self._type_, '_type_', None)
     if letter == 'c':
-        return "".join(l)
+        return b"".join(l)
     if letter == 'u':
         return u"".join(l)
     return l
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -176,6 +176,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -114,7 +114,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __nonzero__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -291,6 +291,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1027,21 +1027,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -16,4 +16,10 @@
     with fp:
         imp.load_module('_testcapi', fp, filename, description)
 except ImportError:
+    if os.name == 'nt':
+        # hack around finding compilers on win32
+        try:
+            import setuptools
+        except ImportError:
+            pass
     _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)
diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py
--- a/lib_pypy/_tkinter/app.py
+++ b/lib_pypy/_tkinter/app.py
@@ -180,6 +180,9 @@
             if err == tklib.TCL_ERROR:
                 self.raiseTclError()
 
+    def interpaddr(self):
+        return int(tkffi.cast('size_t', self.interp))
+
     def _var_invoke(self, func, *args, **kwargs):
         if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread():
             # The current thread is not the interpreter thread.
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -182,6 +182,57 @@
 technical difficulties.
 
 
+What about numpy, numpypy, micronumpy?
+--------------------------------------
+
+Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy.  It
+has two pieces:
+
+  * the builtin module :source:`pypy/module/micronumpy`: this is written in
+    RPython and roughly covers the content of the ``numpy.core.multiarray``
+    module. Confusingly enough, this is available in PyPy under the name
+    ``_numpypy``.  It is included by default in all the official releases of
+    PyPy (but it might be dropped in the future).
+
+  * a fork_ of the official numpy repository maintained by us and informally
+    called ``numpypy``: even more confusing, the name of the repo on bitbucket
+    is ``numpy``.  The main difference with the upstream numpy, is that it is
+    based on the micronumpy module written in RPython, instead of of
+    ``numpy.core.multiarray`` which is written in C.
+
+Moreover, it is also possible to install the upstream version of ``numpy``:
+its core is written in C and it runs on PyPy under the cpyext compatibility
+layer. This is what you get if you do ``pypy -m pip install numpy``.
+
+
+Should I install numpy or numpypy?
+-----------------------------------
+
+TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip
+install numpy``.  You might also be interested in using the experimental `PyPy
+binary wheels`_ to save compilation time.
+
+The upstream ``numpy`` is written in C, and runs under the cpyext
+compatibility layer.  Nowadays, cpyext is mature enough that you can simply
+use the upstream ``numpy``, since it passes 99.9% of the test suite. At the
+moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext
+is infamously slow, and thus it has worse performance compared to
+``numpypy``. However, we are actively working on improving it, as we expect to
+reach the same speed, eventually.
+
+On the other hand, ``numpypy`` is more JIT-friendly and very fast to call,
+since it is written in RPython: but it is a reimplementation, and it's hard to
+be completely compatible: over the years the project slowly matured and
+eventually it was able to call out to the LAPACK and BLAS libraries to speed
+matrix calculations, and reached around an 80% parity with the upstream
+numpy. However, 80% is far from 100%.  Since cpyext/numpy compatibility is
+progressing fast, we have discontinued support for ``numpypy``.
+
+.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html
+.. _fork: https://bitbucket.org/pypy/numpy
+.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels
+
+
 Is PyPy more clever than CPython about Tail Calls?
 --------------------------------------------------
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,13 @@
 
 .. branch: docs-osx-brew-openssl
 
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot
diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'"
+    binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -290,66 +290,87 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
         #
-        def g(c):
+        def bar(c):
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py
--- a/pypy/module/_cppyy/__init__.py
+++ b/pypy/module/_cppyy/__init__.py
@@ -7,7 +7,7 @@
     interpleveldefs = {
         '_resolve_name'          : 'interp_cppyy.resolve_name',
         '_scope_byname'          : 'interp_cppyy.scope_byname',
-        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_is_template'           : 'interp_cppyy.is_template',
         '_std_string_name'       : 'interp_cppyy.std_string_name',
         '_set_class_generator'   : 'interp_cppyy.set_class_generator',
         '_set_function_generator': 'interp_cppyy.set_function_generator',
@@ -15,7 +15,9 @@
         '_get_nullptr'           : 'interp_cppyy.get_nullptr',
         'CPPClassBase'           : 'interp_cppyy.W_CPPClass',
         'addressof'              : 'interp_cppyy.addressof',
+        '_bind_object'           : 'interp_cppyy._bind_object',
         'bind_object'            : 'interp_cppyy.bind_object',
+        'move'                   : 'interp_cppyy.move',
     }
 
     appleveldefs = {
diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py
--- a/pypy/module/_cppyy/capi/loadable_capi.py
+++ b/pypy/module/_cppyy/capi/loadable_capi.py
@@ -217,7 +217,8 @@
             'method_req_args'          : ([c_scope, c_index],         c_int),
             'method_arg_type'          : ([c_scope, c_index, c_int],  c_ccharp),
             'method_arg_default'       : ([c_scope, c_index, c_int],  c_ccharp),
-            'method_signature'         : ([c_scope, c_index],         c_ccharp),
+            'method_signature'         : ([c_scope, c_index, c_int],  c_ccharp),
+            'method_prototype'         : ([c_scope, c_index, c_int],  c_ccharp),
 
             'method_is_template'       : ([c_scope, c_index],         c_int),
             'method_num_template_args' : ([c_scope, c_index],         c_int),
@@ -498,9 +499,12 @@
 def c_method_arg_default(space, cppscope, index, arg_index):
     args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)]
     return charp2str_free(space, call_capi(space, 'method_arg_default', args))
-def c_method_signature(space, cppscope, index):
-    args = [_ArgH(cppscope.handle), _ArgL(index)]
+def c_method_signature(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
     return charp2str_free(space, call_capi(space, 'method_signature', args))
+def c_method_prototype(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
+    return charp2str_free(space, call_capi(space, 'method_prototype', args))
 
 def c_method_is_template(space, cppscope, index):
     args = [_ArgH(cppscope.handle), _ArgL(index)]
diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py
--- a/pypy/module/_cppyy/converter.py
+++ b/pypy/module/_cppyy/converter.py
@@ -4,7 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rawrefcount
 
 from pypy.module._rawffi.interp_rawffi import letter2tp
 from pypy.module._rawffi.array import W_Array, W_ArrayInstance
@@ -21,9 +21,9 @@
 # match for the qualified type.
 
 
-def get_rawobject(space, w_obj):
+def get_rawobject(space, w_obj, can_be_None=True):
     from pypy.module._cppyy.interp_cppyy import W_CPPClass
-    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None)
     if cppinstance:
         rawobject = cppinstance.get_rawobject()
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
@@ -48,17 +48,16 @@
     return capi.C_NULL_OBJECT
 
 def is_nullpointer_specialcase(space, w_obj):
-    # 0, None, and nullptr may serve as "NULL", check for any of them
+    # 0 and nullptr may serve as "NULL"
 
     # integer 0
     try:
         return space.int_w(w_obj) == 0
     except Exception:
         pass
-    # None or nullptr
+    # C++-style nullptr
     from pypy.module._cppyy import interp_cppyy
-    return space.is_true(space.is_(w_obj, space.w_None)) or \
-        space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
+    return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
 
 def get_rawbuffer(space, w_obj):
     # raw buffer
@@ -74,7 +73,7 @@
             return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space)))
     except Exception:
         pass
-    # pre-defined NULL
+    # pre-defined nullptr
     if is_nullpointer_specialcase(space, w_obj):
         return rffi.cast(rffi.VOIDP, 0)
     raise TypeError("not an addressable buffer")
@@ -392,6 +391,7 @@
     _immutable_fields_ = ['typecode']
     typecode = 'g'
 
+
 class CStringConverter(TypeConverter):
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.LONGP, address)
@@ -408,18 +408,27 @@
     def free_argument(self, space, arg, call_local):
         lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw')
 
+class CStringConverterWithSize(CStringConverter):
+    _immutable_fields_ = ['size']
+
+    def __init__(self, space, extra):
+        self.size = extra
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        charpptr = rffi.cast(rffi.CCHARP, address)
+        strsize = self.size
+        if charpptr[self.size-1] == '\0':
+            strsize = self.size-1  # rffi will add \0 back
+        return space.newbytes(rffi.charpsize2str(charpptr, strsize))
+
 
 class VoidPtrConverter(TypeConverter):
     def _unwrap_object(self, space, w_obj):
         try:
             obj = get_rawbuffer(space, w_obj)
         except TypeError:
-            try:
-                # TODO: accept a 'capsule' rather than naked int
-                # (do accept int(0), though)
-                obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj))
-            except Exception:
-                obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+            obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False))
         return obj
 
     def cffi_type(self, space):
@@ -463,12 +472,12 @@
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.VOIDPP, address)
         ba = rffi.cast(rffi.CCHARP, address)
-        r = rffi.cast(rffi.VOIDPP, call_local)
         try:
-            r[0] = get_rawbuffer(space, w_obj)
+            x[0] = get_rawbuffer(space, w_obj)
         except TypeError:
+            r = rffi.cast(rffi.VOIDPP, call_local)
             r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
-        x[0] = rffi.cast(rffi.VOIDP, call_local)
+            x[0] = rffi.cast(rffi.VOIDP, call_local)
         ba[capi.c_function_arg_typeoffset(space)] = self.typecode
 
     def finalize_call(self, space, w_obj, call_local):
@@ -495,9 +504,13 @@
     def _unwrap_object(self, space, w_obj):
         from pypy.module._cppyy.interp_cppyy import W_CPPClass
         if isinstance(w_obj, W_CPPClass):
-            if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl):
+            from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                # reject moves as all are explicit
+                raise ValueError("lvalue expected")
+            if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl):
                 rawobject = w_obj.get_rawobject()
-                offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1)
+                offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1)
                 obj_address = capi.direct_ptradd(rawobject, offset)
                 return rffi.cast(capi.C_OBJECT, obj_address)
         raise oefmt(space.w_TypeError,
@@ -518,6 +531,17 @@
         x = rffi.cast(rffi.VOIDPP, address)
         x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
 
+class InstanceMoveConverter(InstanceRefConverter):
+    def _unwrap_object(self, space, w_obj):
+        # moving is same as by-ref, but have to check that move is allowed
+        from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE
+        if isinstance(w_obj, W_CPPClass):
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE
+                return InstanceRefConverter._unwrap_object(self, space, w_obj)
+        raise oefmt(space.w_ValueError, "object is not an rvalue")
+
+
 class InstanceConverter(InstanceRefConverter):
 
     def convert_argument_libffi(self, space, w_obj, address, call_local):
@@ -527,7 +551,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         self._is_abstract(space)
@@ -548,7 +572,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset))
@@ -582,8 +606,8 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl,
-                                           do_cast=False, is_ref=True)
+        return interp_cppyy.wrap_cppinstance(
+            space, address, self.clsdecl, do_cast=False, is_ref=True)
 
 class StdStringConverter(InstanceConverter):
 
@@ -606,7 +630,7 @@
             assign = self.clsdecl.get_overload("__assign__")
             from pypy.module._cppyy import interp_cppyy
             assign.call(
-                interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value])
+                interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value])
         except Exception:
             InstanceConverter.to_memory(self, space, w_obj, w_value, offset)
 
@@ -672,7 +696,7 @@
 
 _converters = {}         # builtin and custom types
 _a_converters = {}       # array and ptr versions of above
-def get_converter(space, name, default):
+def get_converter(space, _name, default):
     # The matching of the name to a converter should follow:
     #   1) full, exact match
     #       1a) const-removed match
@@ -680,9 +704,9 @@
     #   3) accept ref as pointer (for the stubs, const& can be
     #       by value, but that does not work for the ffi path)
     #   4) generalized cases (covers basically all user classes)
-    #   5) void converter, which fails on use
+    #   5) void* or void converter (which fails on use)
 
-    name = capi.c_resolve_name(space, name)
+    name = capi.c_resolve_name(space, _name)
 
     #   1) full, exact match
     try:
@@ -701,7 +725,7 @@
     clean_name = capi.c_resolve_name(space, helper.clean_type(name))
     try:
         # array_index may be negative to indicate no size or no size found
-        array_size = helper.array_size(name)
+        array_size = helper.array_size(_name)     # uses original arg
         return _a_converters[clean_name+compound](space, array_size)
     except KeyError:
         pass
@@ -719,6 +743,8 @@
             return InstancePtrConverter(space, clsdecl)
         elif compound == "&":
             return InstanceRefConverter(space, clsdecl)
+        elif compound == "&&":
+            return InstanceMoveConverter(space, clsdecl)
         elif compound == "**":
             return InstancePtrPtrConverter(space, clsdecl)
         elif compound == "":
@@ -726,11 +752,13 @@
     elif capi.c_is_enum(space, clean_name):
         return _converters['unsigned'](space, default)
 
-    #   5) void converter, which fails on use
-    #
+    #   5) void* or void converter (which fails on use)
+    if 0 <= compound.find('*'):
+        return VoidPtrConverter(space, default)  # "user knows best"
+
     # return a void converter here, so that the class can be build even
-    # when some types are unknown; this overload will simply fail on use
-    return VoidConverter(space, name)
+    # when some types are unknown
+    return VoidConverter(space, name)            # fails on use
 
 
 _converters["bool"]                     = BoolConverter
@@ -847,6 +875,10 @@
         for name in names:
             _a_converters[name+'[]'] = ArrayConverter
             _a_converters[name+'*']  = PtrConverter
+
+    # special case, const char* w/ size and w/o '\0'
+    _a_converters["const char[]"] = CStringConverterWithSize
+
 _build_array_converters()
 
 # add another set of aliased names
diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py
--- a/pypy/module/_cppyy/executor.py
+++ b/pypy/module/_cppyy/executor.py
@@ -159,7 +159,7 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
         return pyres
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
@@ -167,7 +167,7 @@
         result = rffi.ptradd(buffer, cif_descr.exchange_result)
         from pypy.module._cppyy import interp_cppyy
         ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
 class InstancePtrPtrExecutor(InstancePtrExecutor):
 
@@ -176,7 +176,7 @@
         voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args)
         ref_address = rffi.cast(rffi.VOIDPP, voidp_result)
         ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
@@ -188,8 +188,8 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass,
-                                           do_cast=False, python_owns=True, fresh=True)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass,
+                                             do_cast=False, python_owns=True, fresh=True)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h
--- a/pypy/module/_cppyy/include/capi.h
+++ b/pypy/module/_cppyy/include/capi.h
@@ -19,14 +19,15 @@
     RPY_EXTERN
     int cppyy_num_scopes(cppyy_scope_t parent);
     RPY_EXTERN
-    char* cppyy_scope_name(cppyy_scope_t parent, int iscope);
-
+    char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope);
     RPY_EXTERN
     char* cppyy_resolve_name(const char* cppitem_name);
     RPY_EXTERN
     cppyy_scope_t cppyy_get_scope(const char* scope_name);
     RPY_EXTERN
     cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj);
+    RPY_EXTERN
+    size_t cppyy_size_of(cppyy_type_t klass);
 
     /* memory management ------------------------------------------------------ */
     RPY_EXTERN
@@ -120,6 +121,8 @@
     RPY_EXTERN
     char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
+    char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx);
+    RPY_EXTERN
     char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
     int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx);
@@ -130,7 +133,9 @@
     RPY_EXTERN
     char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index);
     RPY_EXTERN
-    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx);
+    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
+    RPY_EXTERN
+    char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
 
     RPY_EXTERN
     int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx);
@@ -147,8 +152,12 @@
 
     /* method properties ------------------------------------------------------ */
     RPY_EXTERN
+    int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx);
     RPY_EXTERN
+    int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx);
 
     /* data member reflection information ------------------------------------- */
diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py
--- a/pypy/module/_cppyy/interp_cppyy.py
+++ b/pypy/module/_cppyy/interp_cppyy.py
@@ -2,7 +2,7 @@
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
 from pypy.interpreter.baseobjspace import W_Root
 
 from rpython.rtyper.lltypesystem import rffi, lltype, llmemory
@@ -15,6 +15,10 @@
 from pypy.module._cppyy import converter, executor, ffitypes, helper
 
 
+INSTANCE_FLAGS_PYTHON_OWNS = 0x0001
+INSTANCE_FLAGS_IS_REF      = 0x0002
+INSTANCE_FLAGS_IS_R_VALUE  = 0x0004
+
 class FastCallNotPossible(Exception):
     pass
 
@@ -33,16 +37,21 @@
 
 class State(object):
     def __init__(self, space):
+        # final scoped name -> opaque handle
         self.cppscope_cache = {
-            "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) }
+            'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') }
+        # opaque handle -> app-level python class
+        self.cppclass_registry = {}
+        # app-level class generator callback
+        self.w_clgen_callback = None
+        # app-level function generator callback (currently not used)
+        self.w_fngen_callback = None
+        # C++11's nullptr
         self.w_nullptr = None
-        self.cpptemplate_cache = {}
-        self.cppclass_registry = {}
-        self.w_clgen_callback = None
-        self.w_fngen_callback = None
 
 def get_nullptr(space):
-    if hasattr(space, "fake"):
+    # construct a unique address that compares to NULL, serves as nullptr
+    if hasattr(space, 'fake'):
         raise NotImplementedError
     state = space.fromcache(State)
     if state.w_nullptr is None:
@@ -58,52 +67,48 @@
         state.w_nullptr = nullarr
     return state.w_nullptr
 
- at unwrap_spec(name='text')
-def resolve_name(space, name):
-    return space.newtext(capi.c_resolve_name(space, name))
+ at unwrap_spec(scoped_name='text')
+def resolve_name(space, scoped_name):
+    return space.newtext(capi.c_resolve_name(space, scoped_name))
 
- at unwrap_spec(name='text')
-def scope_byname(space, name):
-    true_name = capi.c_resolve_name(space, name)
 
+# memoized lookup of handles by final, scoped, name of classes/namespaces
+ at unwrap_spec(final_scoped_name='text')
+def scope_byname(space, final_scoped_name):
     state = space.fromcache(State)
     try:
-        return state.cppscope_cache[true_name]
+        return state.cppscope_cache[final_scoped_name]
     except KeyError:
         pass
 
-    opaque_handle = capi.c_get_scope_opaque(space, true_name)
+    opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name)
     assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
     if opaque_handle:
-        final_name = capi.c_final_name(space, opaque_handle)
-        if capi.c_is_namespace(space, opaque_handle):
-            cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle)
-        elif capi.c_has_complex_hierarchy(space, opaque_handle):
-            cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle)
+        isns = capi.c_is_namespace(space, opaque_handle)
+        if isns:
+            cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name)
         else:
-            cppscope = W_CPPClassDecl(space, final_name, opaque_handle)
-        state.cppscope_cache[name] = cppscope
+            if capi.c_has_complex_hierarchy(space, opaque_handle):
+                cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name)
+            else:
+                cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name)
 
-        cppscope._build_methods()
-        cppscope._find_datamembers()
+        # store in the cache to prevent recursion
+        state.cppscope_cache[final_scoped_name] = cppscope
+
+        if not isns:
+            # build methods/data; TODO: also defer this for classes (a functional __dir__
+            # and instrospection for help() is enough and allows more lazy loading)
+            cppscope._build_methods()
+            cppscope._find_datamembers()
+
         return cppscope
 
     return None
 
- at unwrap_spec(name='text')
-def template_byname(space, name):
-    state = space.fromcache(State)
-    try:
-        return state.cpptemplate_cache[name]
-    except KeyError:
-        pass
-
-    if capi.c_is_template(space, name):
-        cpptemplate = W_CPPTemplateType(space, name)
-        state.cpptemplate_cache[name] = cpptemplate
-        return cpptemplate
-
-    return None
+ at unwrap_spec(final_scoped_name='text')
+def is_template(space, final_scoped_name):
+    return space.newbool(capi.c_is_template(space, final_scoped_name))
 
 def std_string_name(space):
     return space.newtext(capi.std_string_name)
@@ -189,8 +194,13 @@
         # check number of given arguments against required (== total - defaults)
         args_expected = len(self.arg_defs)
         args_given = len(args_w)
-        if args_expected < args_given or args_given < self.args_required:
-            raise oefmt(self.space.w_TypeError, "wrong number of arguments")
+
+        if args_given < self.args_required:
+            raise oefmt(self.space.w_TypeError,
+                "takes at least %d arguments (%d given)", self.args_required, args_given)
+        elif args_expected < args_given:
+            raise oefmt(self.space.w_TypeError,
+                "takes at most %d arguments (%d given)", args_expected, args_given)
 
         # initial setup of converters, executors, and libffi (if available)
         if self.converters is None:
@@ -376,8 +386,11 @@
             conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i)
         capi.c_deallocate_function_args(self.space, args)
 
-    def signature(self):
-        return capi.c_method_signature(self.space, self.scope, self.index)
+    def signature(self, show_formalargs=True):
+        return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs)
+
+    def prototype(self, show_formalargs=True):
+        return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs)
 
     def priority(self):
         total_arg_priority = 0
@@ -391,7 +404,7 @@
             lltype.free(self.cif_descr, flavor='raw')
 
     def __repr__(self):
-        return "CPPMethod: %s" % self.signature()
+        return "CPPMethod: %s" % self.prototype()
 
     def _freeze_(self):
         assert 0, "you should never have a pre-built instance of this!"
@@ -407,7 +420,7 @@
         return capi.C_NULL_OBJECT
 
     def __repr__(self):
-        return "CPPFunction: %s" % self.signature()
+        return "CPPFunction: %s" % self.prototype()
 
 
 class CPPTemplatedCall(CPPMethod):
@@ -440,7 +453,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPTemplatedCall: %s" % self.signature()
+        return "CPPTemplatedCall: %s" % self.prototype()
 
 
 class CPPConstructor(CPPMethod):
@@ -462,7 +475,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPConstructor: %s" % self.signature()
+        return "CPPConstructor: %s" % self.prototype()
 
 
 class CPPSetItem(CPPMethod):
@@ -549,12 +562,12 @@
                     w_exc_type = e.w_type
                 elif all_same_type and not e.match(self.space, w_exc_type):
                     all_same_type = False
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    '+e.errorstr(self.space)
             except Exception as e:
                 # can not special case this for non-overloaded functions as we anyway need an
                 # OperationError error down from here
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    Exception: '+str(e)
 
         if all_same_type and w_exc_type is not None:
@@ -562,20 +575,20 @@
         else:
             raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg))
 
-    def signature(self):
-        sig = self.functions[0].signature()
+    def prototype(self):
+        sig = self.functions[0].prototype()
         for i in range(1, len(self.functions)):
-            sig += '\n'+self.functions[i].signature()
+            sig += '\n'+self.functions[i].prototype()
         return self.space.newtext(sig)
 
     def __repr__(self):
-        return "W_CPPOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPOverload.typedef = TypeDef(
     'CPPOverload',
     is_static = interp2app(W_CPPOverload.is_static),
     call = interp2app(W_CPPOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPOverload.prototype),
 )
 
 
@@ -591,24 +604,40 @@
     @jit.unroll_safe
     @unwrap_spec(args_w='args_w')
     def call(self, w_cppinstance, args_w):
+        # TODO: factor out the following:
+        if capi.c_is_abstract(self.space, self.scope.handle):
+            raise oefmt(self.space.w_TypeError,
+                        "cannot instantiate abstract class '%s'",
+                        self.scope.name)
         w_result = W_CPPOverload.call(self, w_cppinstance, args_w)
         newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result))
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if cppinstance is not None:
             cppinstance._rawobject = newthis
             memory_regulator.register(cppinstance)
-            return w_cppinstance
-        return wrap_cppobject(self.space, newthis, self.functions[0].scope,
-                              do_cast=False, python_owns=True, fresh=True)
 
     def __repr__(self):
-        return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPConstructorOverload.typedef = TypeDef(
     'CPPConstructorOverload',
     is_static = interp2app(W_CPPConstructorOverload.is_static),
     call = interp2app(W_CPPConstructorOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPConstructorOverload.prototype),
+)
+
+
+class W_CPPTemplateOverload(W_CPPOverload):
+    @unwrap_spec(args_w='args_w')
+    def __getitem__(self, args_w):
+        pass
+
+    def __repr__(self):
+        return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions]
+
+W_CPPTemplateOverload.typedef = TypeDef(
+    'CPPTemplateOverload',
+    __getitem__ = interp2app(W_CPPTemplateOverload.call),
 )
 
 
@@ -622,6 +651,9 @@
     def __call__(self, args_w):
         return self.method.bound_call(self.cppthis, args_w)
 
+    def __repr__(self):
+        return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions]
+
 W_CPPBoundMethod.typedef = TypeDef(
     'CPPBoundMethod',
     __call__ = interp2app(W_CPPBoundMethod.__call__),
@@ -643,8 +675,8 @@
 
     def _get_offset(self, cppinstance):
         if cppinstance:
-            assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle)
-            offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope)
+            assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle)
+            offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope)
         else:
             offset = self.offset
         return offset
@@ -652,7 +684,7 @@
     def get(self, w_cppinstance, w_pycppclass):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset)
@@ -660,7 +692,7 @@
     def set(self, w_cppinstance, w_value):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         self.converter.to_memory(self.space, w_cppinstance, w_value, offset)
@@ -705,12 +737,12 @@
         return space.w_False
 
 class W_CPPScopeDecl(W_Root):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
     _immutable_fields_ = ['handle', 'name']
 
-    def __init__(self, space, name, opaque_handle):
+    def __init__(self, space, opaque_handle, final_scoped_name):
         self.space = space
-        self.name = name
+        self.name = final_scoped_name
         assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
         self.handle = opaque_handle
         self.methods = {}
@@ -753,7 +785,7 @@
         overload = self.get_overload(name)
         sig = '(%s)' % signature
         for f in overload.functions:
-            if 0 < f.signature().find(sig):
+            if f.signature(False) == sig:
                 return W_CPPOverload(self.space, self, [f])
         raise oefmt(self.space.w_LookupError, "no overload matches signature")
 
@@ -769,6 +801,9 @@
 # classes for inheritance. Both are python classes, though, and refactoring
 # may be in order at some point.
 class W_CPPNamespaceDecl(W_CPPScopeDecl):
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name']
+
     def _make_cppfunction(self, pyname, index):
         num_args = capi.c_method_num_args(self.space, self, index)
         args_required = capi.c_method_req_args(self.space, self, index)
@@ -779,9 +814,6 @@
             arg_defs.append((arg_type, arg_dflt))
         return CPPFunction(self.space, self, index, arg_defs, args_required)
 
-    def _build_methods(self):
-        pass       # force lazy lookups in namespaces
-
     def _make_datamember(self, dm_name, dm_idx):
         type_name = capi.c_datamember_type(self.space, self, dm_idx)
         offset = capi.c_datamember_offset(self.space, self, dm_idx)
@@ -791,9 +823,6 @@
         self.datamembers[dm_name] = datamember
         return datamember
 
-    def _find_datamembers(self):
-        pass       # force lazy lookups in namespaces
-
     def find_overload(self, meth_name):
         indices = capi.c_method_indices_from_name(self.space, self, meth_name)
         if not indices:
@@ -855,18 +884,21 @@
 
 
 class W_CPPClassDecl(W_CPPScopeDecl):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
-    _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]']
 
     def _build_methods(self):
         assert len(self.methods) == 0
         methods_temp = {}
         for i in range(capi.c_num_methods(self.space, self)):
             idx = capi.c_method_index_at(self.space, self, i)
-            pyname = helper.map_operator_name(self.space,
-                capi.c_method_name(self.space, self, idx),
-                capi.c_method_num_args(self.space, self, idx),
-                capi.c_method_result_type(self.space, self, idx))
+            if capi.c_is_constructor(self.space, self, idx):
+                pyname = '__init__'
+            else:
+                pyname = helper.map_operator_name(self.space,
+                    capi.c_method_name(self.space, self, idx),
+                    capi.c_method_num_args(self.space, self, idx),
+                    capi.c_method_result_type(self.space, self, idx))
             cppmethod = self._make_cppfunction(pyname, idx)
             methods_temp.setdefault(pyname, []).append(cppmethod)
         # the following covers the case where the only kind of operator[](idx)
@@ -883,7 +915,7 @@
         # create the overload methods from the method sets
         for pyname, methods in methods_temp.iteritems():
             CPPMethodSort(methods).sort()
-            if pyname == self.name:
+            if pyname == '__init__':
                 overload = W_CPPConstructorOverload(self.space, self, methods[:])
             else:
                 overload = W_CPPOverload(self.space, self, methods[:])
@@ -934,11 +966,11 @@
         raise self.missing_attribute_error(name)
 
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return 0
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return cppinstance.get_rawobject()
 
     def is_namespace(self):
@@ -973,13 +1005,13 @@
 
 class W_CPPComplexClassDecl(W_CPPClassDecl):
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = capi.c_base_offset(self.space,
                                     self, calling_scope, cppinstance.get_rawobject(), 1)
         return offset
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = self.get_base_offset(cppinstance, calling_scope)
         return capi.direct_ptradd(cppinstance.get_rawobject(), offset)
 
@@ -997,70 +1029,56 @@
 W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False
 
 
-class W_CPPTemplateType(W_Root):
-    _attrs_ = ['space', 'name']
-    _immutable_fields = ['name']
-
-    def __init__(self, space, name):
-        self.space = space
-        self.name = name
-
-    @unwrap_spec(args_w='args_w')
-    def __call__(self, args_w):
-        # TODO: this is broken but unused (see pythonify.py)
-        fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>'])
-        return scope_byname(self.space, fullname)
-
-W_CPPTemplateType.typedef = TypeDef(
-    'CPPTemplateType',
-    __call__ = interp2app(W_CPPTemplateType.__call__),
-)
-W_CPPTemplateType.typedef.acceptable_as_base_class = False
-
-
 class W_CPPClass(W_Root):
-    _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns',
+    _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags',
                'finalizer_registered']
-    _immutable_fields_ = ["cppclass", "isref"]
+    _immutable_fields_ = ['clsdecl']
 
     finalizer_registered = False
 
-    def __init__(self, space, cppclass, rawobject, isref, python_owns):
+    def __init__(self, space, decl, rawobject, isref, python_owns):
         self.space = space
-        self.cppclass = cppclass
+        self.clsdecl = decl
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
         assert not isref or rawobject
         self._rawobject = rawobject
         assert not isref or not python_owns
-        self.isref = isref
-        self.python_owns = python_owns
-        self._opt_register_finalizer()
+        self.flags = 0
+        if isref:
+            self.flags |= INSTANCE_FLAGS_IS_REF
+        if python_owns:
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
 
     def _opt_register_finalizer(self):
-        if self.python_owns and not self.finalizer_registered \
-               and not hasattr(self.space, "fake"):
+        if not self.finalizer_registered and not hasattr(self.space, "fake"):
+            assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS
             self.register_finalizer(self.space)
             self.finalizer_registered = True
 
     def _nullcheck(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             raise oefmt(self.space.w_ReferenceError,
                         "trying to access a NULL pointer")
 
     # allow user to determine ownership rules on a per object level
     def fget_python_owns(self, space):
-        return space.newbool(self.python_owns)
+        return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS))
 
     @unwrap_spec(value=bool)
     def fset_python_owns(self, space, value):
-        self.python_owns = space.is_true(value)
-        self._opt_register_finalizer()
+        if space.is_true(value):
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
+        else:
+            self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS
 
     def get_cppthis(self, calling_scope):
-        return self.cppclass.get_cppthis(self, calling_scope)
+        return self.clsdecl.get_cppthis(self, calling_scope)
 
     def get_rawobject(self):
-        if not self.isref:
+        if not (self.flags & INSTANCE_FLAGS_IS_REF):
             return self._rawobject
         else:
             ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject)
@@ -1078,12 +1096,9 @@
         return None
 
     def instance__init__(self, args_w):
-        if capi.c_is_abstract(self.space, self.cppclass.handle):
-            raise oefmt(self.space.w_TypeError,
-                        "cannot instantiate abstract class '%s'",
-                        self.cppclass.name)
-        constructor_overload = self.cppclass.get_overload(self.cppclass.name)
-        constructor_overload.call(self, args_w)
+        raise oefmt(self.space.w_TypeError,
+                    "cannot instantiate abstract class '%s'",
+                    self.clsdecl.name)
  
     def instance__eq__(self, w_other):
         # special case: if other is None, compare pointer-style
@@ -1099,7 +1114,7 @@
             for name in ["", "__gnu_cxx", "__1"]:
                 nss = scope_byname(self.space, name)
                 meth_idx = capi.c_get_global_operator(
-                    self.space, nss, self.cppclass, other.cppclass, "operator==")
+                    self.space, nss, self.clsdecl, other.clsdecl, "operator==")
                 if meth_idx != -1:
                     f = nss._make_cppfunction("operator==", meth_idx)
                     ol = W_CPPOverload(self.space, nss, [f])
@@ -1118,14 +1133,15 @@
         # fallback 2: direct pointer comparison (the class comparison is needed since
         # the first data member in a struct and the struct have the same address)
         other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False)  # TODO: factor out
-        iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass)
+        iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl)
         return self.space.newbool(iseq)
 
     def instance__ne__(self, w_other):
         return self.space.not_(self.instance__eq__(w_other))
 
     def instance__nonzero__(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             return self.space.w_False
         return self.space.w_True
 
@@ -1134,36 +1150,35 @@
         if w_as_builtin is not None:
             return self.space.len(w_as_builtin)
         raise oefmt(self.space.w_TypeError,
-                    "'%s' has no length", self.cppclass.name)
+                    "'%s' has no length", self.clsdecl.name)
 
     def instance__cmp__(self, w_other):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.cmp(w_as_builtin, w_other)
         raise oefmt(self.space.w_AttributeError,
-                    "'%s' has no attribute __cmp__", self.cppclass.name)
+                    "'%s' has no attribute __cmp__", self.clsdecl.name)
 
     def instance__repr__(self):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.repr(w_as_builtin)
         return self.space.newtext("<%s object at 0x%x>" %
-                               (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
+                               (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
 
     def destruct(self):
-        if self._rawobject and not self.isref:
+        if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF):
             memory_regulator.unregister(self)
-            capi.c_destruct(self.space, self.cppclass, self._rawobject)
+            capi.c_destruct(self.space, self.clsdecl, self._rawobject)
             self._rawobject = capi.C_NULL_OBJECT
 
     def _finalize_(self):
-        if self.python_owns:
+        if self.flags & INSTANCE_FLAGS_PYTHON_OWNS:
             self.destruct()
 
 W_CPPClass.typedef = TypeDef(
     'CPPClass',
-    cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass),
-    _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
+    __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
     __init__ = interp2app(W_CPPClass.instance__init__),
     __eq__ = interp2app(W_CPPClass.instance__eq__),
     __ne__ = interp2app(W_CPPClass.instance__ne__),
@@ -1220,21 +1235,21 @@
     state = space.fromcache(State)
     return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar))
 
-def wrap_cppobject(space, rawobject, cppclass,
-                   do_cast=True, python_owns=False, is_ref=False, fresh=False):
+def wrap_cppinstance(space, rawobject, clsdecl,
+                     do_cast=True, python_owns=False, is_ref=False, fresh=False):
     rawobject = rffi.cast(capi.C_OBJECT, rawobject)
 
     # cast to actual if requested and possible
     w_pycppclass = None
     if do_cast and rawobject:
-        actual = capi.c_actual_class(space, cppclass, rawobject)
-        if actual != cppclass.handle:
+        actual = capi.c_actual_class(space, clsdecl, rawobject)
+        if actual != clsdecl.handle:
             try:
                 w_pycppclass = get_pythonized_cppclass(space, actual)
-                offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1)
+                offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1)
                 rawobject = capi.direct_ptradd(rawobject, offset)
-                w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-                cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
+                w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+                clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False)
             except Exception:
                 # failed to locate/build the derived class, so stick to the base (note
                 # that only get_pythonized_cppclass is expected to raise, so none of
@@ -1242,18 +1257,18 @@
                 pass
 
     if w_pycppclass is None:
-        w_pycppclass = get_pythonized_cppclass(space, cppclass.handle)
+        w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle)
 
     # try to recycle existing object if this one is not newly created
     if not fresh and rawobject:
         obj = memory_regulator.retrieve(rawobject)
-        if obj is not None and obj.cppclass is cppclass:
+        if obj is not None and obj.clsdecl is clsdecl:
             return obj
 
     # fresh creation
     w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass)
     cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False)
-    cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns)
+    cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns)
     memory_regulator.register(cppinstance)
     return w_cppinstance
 
@@ -1264,7 +1279,7 @@
     except TypeError:
         pass
     # attempt to get address of C++ instance
-    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj))
+    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False))
 
 @unwrap_spec(w_obj=W_Root)
 def addressof(space, w_obj):
@@ -1273,19 +1288,30 @@
     return space.newlong(address)
 
 @unwrap_spec(owns=bool, cast=bool)
-def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
-    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False):
     try:
         # attempt address from array or C++ instance
         rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj))
     except Exception:
         # accept integer value as address
         rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj))
-    w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-    if not w_cppclass:
-        w_cppclass = scope_byname(space, space.text_w(w_pycppclass))
-        if not w_cppclass:
+    decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False)
+    return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast)
+
+ at unwrap_spec(owns=bool, cast=bool)
+def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
+    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+    w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+    if not w_clsdecl:
+        w_clsdecl = scope_byname(space, space.text_w(w_pycppclass))
+        if not w_clsdecl:
             raise oefmt(space.w_TypeError,
                         "no such class: %s", space.text_w(w_pycppclass))
-    cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
-    return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns)
+    return _bind_object(space, w_obj, w_clsdecl, owns, cast)
+
+def move(space, w_obj):
+    """Casts the given instance into an C++-style rvalue."""
+    obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    if obj:
+        obj.flags |= INSTANCE_FLAGS_IS_R_VALUE
+    return w_obj
diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py
--- a/pypy/module/_cppyy/pythonify.py
+++ b/pypy/module/_cppyy/pythonify.py
@@ -10,7 +10,7 @@
 class CPPMetaScope(type):
     def __getattr__(self, name):
         try:
-            return get_pycppitem(self, name)  # will cache on self
+            return get_scoped_pycppitem(self, name)  # will cache on self
         except Exception as e:
             raise AttributeError("%s object has no attribute '%s' (details: %s)" %
                                  (self, name, str(e)))
@@ -36,11 +36,14 @@
             self._scope = scope
 
     def _arg_to_str(self, arg):
-        if arg == str:
-            import _cppyy
-            arg = _cppyy._std_string_name()
-        elif type(arg) != str:
-            arg = arg.__name__
+        try:
+            arg = arg.__cppname__
+        except AttributeError:
+            if arg == str:
+                import _cppyy
+                arg = _cppyy._std_string_name()
+            elif type(arg) != str:
+                arg = arg.__name__
         return arg
 
     def __call__(self, *args):
@@ -58,8 +61,36 @@
         return self.__call__(*args)
 
 
-def clgen_callback(name):
-    return get_pycppclass(name)
+def scope_splitter(name):
+    is_open_template, scope = 0, ""
+    for c in name:
+        if c == ':' and not is_open_template:
+            if scope:
+                yield scope
+                scope = ""
+            continue
+        elif c == '<':
+            is_open_template += 1
+        elif c == '>':
+            is_open_template -= 1
+        scope += c
+    yield scope
+
+def get_pycppitem(final_scoped_name):
+    # walk scopes recursively down from global namespace ("::") to get the
+    # actual (i.e. not typedef'ed) class, triggering all necessary creation
+    scope = gbl
+    for name in scope_splitter(final_scoped_name):
+        scope = getattr(scope, name)
+    return scope
+get_pycppclass = get_pycppitem     # currently no distinction, but might
+                                   # in future for performance
+
+
+# callbacks (originating from interp_cppyy.py) to allow interp-level to
+# initiate creation of app-level classes and function
+def clgen_callback(final_scoped_name):
+    return get_pycppclass(final_scoped_name)
 
 def fngen_callback(func, npar): # todo, some kind of arg transform spec
     if npar == 0:
@@ -75,20 +106,19 @@
         return wrapper
 
 
+# construction of namespaces and classes, and their helpers
+def make_module_name(scope):
+    if scope:
+        return scope.__module__ + '.' + scope.__name__
+    return 'cppyy'
+
 def make_static_function(func_name, cppol):
     def function(*args):
         return cppol.call(None, *args)
     function.__name__ = func_name
-    function.__doc__ = cppol.signature()
+    function.__doc__ = cppol.prototype()
     return staticmethod(function)
 
-def make_method(meth_name, cppol):
-    def method(self, *args):
-        return cppol.call(self, *args)
-    method.__name__ = meth_name
-    method.__doc__ = cppol.signature()
-    return method
-
 
 def make_cppnamespace(scope, name, decl):
     # build up a representation of a C++ namespace (namespaces are classes)
@@ -98,20 +128,19 @@
     ns_meta = type(name+'_meta', (CPPMetaNamespace,), {})
 
     # create the python-side C++ namespace representation, cache in scope if given
-    d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ }
+    d = {"__cppdecl__" : decl,
+         "__module__" : make_module_name(scope),
+         "__cppname__" : decl.__cppname__ }
     pyns = ns_meta(name, (CPPNamespace,), d)
     if scope:
         setattr(scope, name, pyns)
 
     # install as modules to allow importing from (note naming: cppyy)
-    modname = 'cppyy.gbl'
-    if scope:
-        modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.')
-    sys.modules[modname] = pyns
+    sys.modules[make_module_name(pyns)] = pyns
     return pyns
 
 def _drop_cycles(bases):
-    # TODO: figure this out, as it seems to be a PyPy bug?!
+    # TODO: figure out why this is necessary?
     for b1 in bases:
         for b2 in bases:
             if not (b1 is b2) and issubclass(b2, b1):
@@ -119,27 +148,37 @@
                 break
     return tuple(bases)
 
-def make_new(class_name):
+
+def make_new(decl):
     def __new__(cls, *args):
         # create a place-holder only as there may be a derived class defined
+        # TODO: get rid of the import and add user-land bind_object that uses
+        # _bind_object (see interp_cppyy.py)
         import _cppyy
-        instance = _cppyy.bind_object(0, class_name, True)
+        instance = _cppyy._bind_object(0, decl, True)
         if not instance.__class__ is cls:
             instance.__class__ = cls     # happens for derived class
         return instance
     return __new__
 
-def make_cppclass(scope, class_name, final_class_name, decl):
+def make_method(meth_name, cppol):
+    def method(self, *args):
+        return cppol.call(self, *args)
+    method.__name__ = meth_name
+    method.__doc__ = cppol.prototype()
+    return method
+
+def make_cppclass(scope, cl_name, decl):
 
     # get a list of base classes for class creation
     bases = [get_pycppclass(base) for base in decl.get_base_names()]
     if not bases:
         bases = [CPPClass,]
     else:
-        # it's technically possible that the required class now has been built
-        # if one of the base classes uses it in e.g. a function interface
+        # it's possible that the required class now has been built if one of
+        # the base classes uses it in e.g. a function interface
         try:
-            return scope.__dict__[final_class_name]
+            return scope.__dict__[cl_name]
         except KeyError:
             pass
 
@@ -147,39 +186,41 @@
     d_meta = {}
 
     # prepare dictionary for python-side C++ class representation
-    def dispatch(self, name, signature):
-        cppol = decl.dispatch(name, signature)
-        return types.MethodType(make_method(name, cppol), self, type(self))
+    def dispatch(self, m_name, signature):
+        cppol = decl.__dispatch__(m_name, signature)
+        return types.MethodType(make_method(m_name, cppol), self, type(self))
     d_class = {"__cppdecl__"   : decl,
+         "__new__"      : make_new(decl),
+         "__module__"   : make_module_name(scope),
          "__cppname__"  : decl.__cppname__,

From pypy.commits at gmail.com  Thu Nov  9 15:20:42 2017
From: pypy.commits at gmail.com (stian)
Date: Thu, 09 Nov 2017 12:20:42 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Fix translation
Message-ID: <5a04b89a.099fdf0a.c3df7.3259@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92988:c961b6f6e3c6
Date: 2017-11-09 21:20 +0100
http://bitbucket.org/pypy/pypy/changeset/c961b6f6e3c6/

Log:	Fix translation

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -167,6 +167,7 @@
     def __ne__(self, other):
         return not (self == other)
 
+    @specialize.argtype(1)
     def digit(self, x):
         """Return the x'th digit, as an int."""
         return self._digits[x]
@@ -212,13 +213,15 @@
         if intval < 0:
             sign = -1
             ival = -r_uint(intval)
+            carry = ival >> SHIFT
         elif intval > 0:
             sign = 1
             ival = r_uint(intval)
+            carry = 0
         else:
             return NULLRBIGINT
 
-        carry = ival >> SHIFT
+        
         if carry:
             return rbigint([_store_digit(ival & MASK),
                 _store_digit(carry)], sign, 2)
@@ -851,17 +854,17 @@
                 size = UDIGIT_TYPE(self.numdigits() - 1)
                 
                 if size > 0:
-                    rem = self.widedigit(size)
+                    wrem = self.widedigit(size)
                     while size > 0:
                         size -= 1
-                        rem = ((rem << SHIFT) | self.digit(size)) % digit
-                        
+                        wrem = ((wrem << SHIFT) | self.digit(size)) % digit
+                    rem = _store_digit(wrem)
                 else:
-                    rem = self.widedigit(0) % digit
+                    rem = _store_digit(self.digit(0) % digit)
 
                 if rem == 0:
                     return NULLRBIGINT
-                mod = rbigint([_store_digit(rem)], -1 if self.sign < 0 else 1, 1)
+                mod = rbigint([rem], -1 if self.sign < 0 else 1, 1)
         else:
             div, mod = _divrem(self, other)
         if mod.sign * other.sign == -1:
@@ -893,16 +896,17 @@
                 size = UDIGIT_TYPE(self.numdigits() - 1)
                 
                 if size > 0:
-                    rem = self.widedigit(size)
+                    wrem = self.widedigit(size)
                     while size > 0:
                         size -= 1
-                        rem = ((rem << SHIFT) | self.digit(size)) % digit
+                        wrem = ((wrem << SHIFT) | self.digit(size)) % digit
+                    rem = _store_digit(wrem)
                 else:
-                    rem = self.digit(0) % digit
+                    rem = _store_digit(self.digit(0) % digit)
 
                 if rem == 0:
                     return NULLRBIGINT
-                mod = rbigint([_store_digit(rem)], -1 if self.sign < 0 else 1, 1)
+                mod = rbigint([rem], -1 if self.sign < 0 else 1, 1)
         else:
             raise ZeroDivisionError("long division or modulo by zero")
 

From pypy.commits at gmail.com  Thu Nov  9 17:22:17 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 09 Nov 2017 14:22:17 -0800 (PST)
Subject: [pypy-commit] pypy default: remove more maemo code
Message-ID: <5a04d519.cc8ddf0a.830c6.3c72@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r92989:7b112966cdd7
Date: 2017-11-10 00:21 +0200
http://bitbucket.org/pypy/pypy/changeset/7b112966cdd7/

Log:	remove more maemo code

diff --git a/rpython/translator/platform/test/test_posix.py b/rpython/translator/platform/test/test_posix.py
--- a/rpython/translator/platform/test/test_posix.py
+++ b/rpython/translator/platform/test/test_posix.py
@@ -64,10 +64,3 @@
         assert 'INCLUDEDIRS = %s/foo/baz/include' % include_prefix in Makefile
         assert 'LIBDIRS = %s/foo/baz/lib' % lib_prefix in Makefile
 
-class TestMaemo(TestMakefile):
-    strict_on_stderr = False
-    
-    def setup_class(cls):
-        from rpython.translator.platform.maemo import check_scratchbox, Maemo
-        check_scratchbox()
-        cls.platform = Maemo()

From pypy.commits at gmail.com  Sat Nov 11 10:51:43 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 11 Nov 2017 07:51:43 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: fix translation
Message-ID: <5a071c8f.57b9df0a.291a8.22f6@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92990:92c6fb568fa1
Date: 2017-11-11 15:51 +0000
http://bitbucket.org/pypy/pypy/changeset/92c6fb568fa1/

Log:	fix translation

diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py
--- a/pypy/objspace/std/typeobject.py
+++ b/pypy/objspace/std/typeobject.py
@@ -977,6 +977,7 @@
     w_type.setdictvalue(space, '__doc__', w_value)
 
 def type_get_txtsig(space, w_type):
+    w_type = _check(space, w_type)
     if w_type.text_signature is None:
         return space.w_None
     return space.newtext(w_type.text_signature)

From pypy.commits at gmail.com  Sat Nov 11 16:06:26 2017
From: pypy.commits at gmail.com (fijal)
Date: Sat, 11 Nov 2017 13:06:26 -0800 (PST)
Subject: [pypy-commit] pypy default: add a hint
Message-ID: <5a076652.44841c0a.152ca.d548@mx.google.com>

Author: fijal
Branch: 
Changeset: r92991:e5bfccc9fd98
Date: 2017-11-11 16:05 -0500
http://bitbucket.org/pypy/pypy/changeset/e5bfccc9fd98/

Log:	add a hint

diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -385,6 +385,7 @@
 
     @specialize.argtype(1)
     def _inplace_add(self, other):
+        resizelist_hint(self._data, len(self._data) + len(other))
         for i in range(len(other)):
             self._data.append(other[i])
 

From pypy.commits at gmail.com  Sat Nov 11 23:29:38 2017
From: pypy.commits at gmail.com (stian)
Date: Sat, 11 Nov 2017 20:29:38 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Remove some unneddecary use
 of widedigit in _x_mul
Message-ID: <5a07ce32.c39cdf0a.b72ee.96a2@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92992:985fb3488ff0
Date: 2017-11-12 05:28 +0100
http://bitbucket.org/pypy/pypy/changeset/985fb3488ff0/

Log:	Remove some unneddecary use of widedigit in _x_mul

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -1753,12 +1753,12 @@
                 pz += 1
                 carry >>= SHIFT
             if carry:
-                carry += z.uwidedigit(pz)
+                carry += z.udigit(pz)
                 z.setdigit(pz, carry)
                 pz += 1
                 carry >>= SHIFT
             if carry:
-                z.setdigit(pz, z.uwidedigit(pz) + carry)
+                z.setdigit(pz, z.udigit(pz) + carry)
             assert (carry >> SHIFT) == 0
             i += 1
         z._normalize()
@@ -1822,7 +1822,7 @@
             pz += 1
             carry >>= SHIFT
         if carry:
-            z.setdigit(pz, z.uwidedigit(pz) + carry)
+            z.setdigit(pz, z.udigit(pz) + carry)
     z._normalize()
     return z
 

From pypy.commits at gmail.com  Sun Nov 12 02:41:43 2017
From: pypy.commits at gmail.com (stian)
Date: Sat, 11 Nov 2017 23:41:43 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Make inplace_divmod unsigned,
 this makes for a ~20% speed up in long / single digit
Message-ID: <5a07fb37.131f1c0a.a8ee2.41fb@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92993:3c7a6c85f39c
Date: 2017-11-12 08:40 +0100
http://bitbucket.org/pypy/pypy/changeset/3c7a6c85f39c/

Log:	Make inplace_divmod unsigned, this makes for a ~20% speed up in long
	/ single digit

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -718,7 +718,7 @@
             elif a._digits[0] == ONEDIGIT:
                 return rbigint(b._digits[:bsize], a.sign * b.sign, bsize)
             elif bsize == 1:
-                res = b.uwidedigit(0) * a.uwidedigit(0)
+                res = b.uwidedigit(0) * a.udigit(0)
                 carry = res >> SHIFT
                 if carry:
                     return rbigint([_store_digit(res & MASK), _store_digit(carry)], a.sign * b.sign, 2)
@@ -1949,13 +1949,13 @@
     Divide bigint pin by non-zero digit n, storing quotient
     in pout, and returning the remainder. It's OK for pin == pout on entry.
     """
-    rem = _widen_digit(0)
+    rem = _unsigned_widen_digit(0)
     assert n > 0 and n <= MASK
     if not size:
         size = pin.numdigits()
     size -= 1
     while size >= 0:
-        rem = (rem << SHIFT) | pin.digit(size)
+        rem = (rem << SHIFT) | pin.udigit(size)
         hi = rem // n
         pout.setdigit(size, hi)
         rem -= hi * n

From pypy.commits at gmail.com  Sun Nov 12 04:36:38 2017
From: pypy.commits at gmail.com (stian)
Date: Sun, 12 Nov 2017 01:36:38 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Provide two assets to make
 better code in long multidigit division
Message-ID: <5a081626.26afdf0a.bdbf5.9cbe@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92994:22373c826010
Date: 2017-11-12 10:29 +0100
http://bitbucket.org/pypy/pypy/changeset/22373c826010/

Log:	Provide two assets to make better code in long multidigit division

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2130,8 +2130,11 @@
             vtop = 0
         else:
             vtop = v.widedigit(j) << SHIFT
-        #assert vtop <= wm1
+
         vv = vtop | v.digit(abs(j-1))
+        # These two hints to make division just as fast as doing it unsigned.
+        assert vv >= 0
+        assert wm1 >= 1
         q = vv / wm1
         r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q.
         vj2 = v.digit(abs(j-2))

From pypy.commits at gmail.com  Sun Nov 12 04:36:40 2017
From: pypy.commits at gmail.com (stian)
Date: Sun, 12 Nov 2017 01:36:40 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Tweak comment about why we
 don't do it unsigned.
Message-ID: <5a081628.480f1c0a.a8b02.89ac@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92995:f09288ca6bf9
Date: 2017-11-12 10:36 +0100
http://bitbucket.org/pypy/pypy/changeset/f09288ca6bf9/

Log:	Tweak comment about why we don't do it unsigned.

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2117,7 +2117,7 @@
 
     wm1 = w.widedigit(abs(size_w-1))
     wm2 = w.widedigit(abs(size_w-2))
-
+    
     j = size_v - 1
     k -= 1
     while k >= 0:
@@ -2132,7 +2132,7 @@
             vtop = v.widedigit(j) << SHIFT
 
         vv = vtop | v.digit(abs(j-1))
-        # These two hints to make division just as fast as doing it unsigned.
+        # Hints to make division just as fast as doing it unsigned. But avoids casting to get correct results.
         assert vv >= 0
         assert wm1 >= 1
         q = vv / wm1

From pypy.commits at gmail.com  Sun Nov 12 09:31:49 2017
From: pypy.commits at gmail.com (arigo)
Date: Sun, 12 Nov 2017 06:31:49 -0800 (PST)
Subject: [pypy-commit] pypy default: Issue #2699: test and fixes.
Message-ID: <5a085b55.178fdf0a.93bfd.bea7@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r92996:bc4acc4caa28
Date: 2017-11-12 15:30 +0100
http://bitbucket.org/pypy/pypy/changeset/bc4acc4caa28/

Log:	Issue #2699: test and fixes.

	Note that this includes a fix to the stdlib warnings.py, otherwise
	non-ascii warning messages are usually swallowed. That's a bug in
	CPython, I think.

diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py
--- a/lib-python/2.7/warnings.py
+++ b/lib-python/2.7/warnings.py
@@ -43,11 +43,12 @@
         unicodetype = unicode
     except NameError:
         unicodetype = ()
+    template = "%s: %s: %s\n"
     try:
         message = str(message)
     except UnicodeEncodeError:
-        pass
-    s =  "%s: %s: %s\n" % (lineno, category.__name__, message)
+        template = unicode(template)
+    s = template % (lineno, category.__name__, message)
     line = linecache.getline(filename, lineno) if line is None else line
     if line:
         line = line.strip()
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -201,9 +201,20 @@
     w_stderr = space.sys.get("stderr")
 
     # Print "filename:lineno: category: text\n"
-    message = "%s:%d: %s: %s\n" % (space.text_w(w_filename), lineno,
-                                   space.text_w(w_name), space.text_w(w_text))
-    space.call_method(w_stderr, "write", space.newtext(message))
+    try:
+        message = "%s:%d: %s: %s\n" % (space.text_w(w_filename), lineno,
+                                       space.text_w(w_name),
+                                       space.text_w(w_text))
+    except OperationError as e:
+        if e.async(space):
+            raise
+        message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno,
+                                        space.unicode_w(w_name),
+                                        space.unicode_w(w_text))
+        w_message = space.newunicode(message)
+    else:
+        w_message = space.newtext(message)
+    space.call_method(w_stderr, "write", w_message)
 
     # Print "  source_line\n"
     if not w_sourceline:
@@ -248,7 +259,7 @@
     if space.isinstance_w(w_message, space.w_Warning):
         w_text = space.str(w_message)
         w_category = space.type(w_message)
-    elif (not space.isinstance_w(w_message, space.w_unicode) or
+    elif (not space.isinstance_w(w_message, space.w_unicode) and
           not space.isinstance_w(w_message, space.w_bytes)):
         w_text = space.str(w_message)
         w_message = space.call_function(w_category, w_message)
diff --git a/pypy/module/_warnings/test/test_warnings.py b/pypy/module/_warnings/test/test_warnings.py
--- a/pypy/module/_warnings/test/test_warnings.py
+++ b/pypy/module/_warnings/test/test_warnings.py
@@ -65,3 +65,23 @@
         _warnings.warn('test', UserWarning)
         globals()['__file__'] = None
         _warnings.warn('test', UserWarning)
+
+    def test_warn_unicode(self):
+        import _warnings, sys
+        old = sys.stderr
+        try:
+            class Grab:
+                def write(self, u):
+                    self.data.append(u)
+            sys.stderr = Grab()
+            sys.stderr.data = data = []
+            _warnings.warn_explicit("9238exbexn8", Warning,
+                                    "<string>", 1421, module_globals=globals())
+            assert isinstance(''.join(data), str)
+            _warnings.warn_explicit(u"\u1234\u5678", UserWarning,
+                                    "<str2>", 831, module_globals=globals())
+            assert isinstance(''.join(data), unicode)
+            assert ''.join(data).endswith(
+                             u'<str2>:831: UserWarning: \u1234\u5678\n')
+        finally:
+            sys.stderr = old

From pypy.commits at gmail.com  Sun Nov 12 15:14:07 2017
From: pypy.commits at gmail.com (stian)
Date: Sun, 12 Nov 2017 12:14:07 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Fix ulllong division OP in
 rtyper
Message-ID: <5a08ab8f.1cbf1c0a.e0517.e336@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r92997:8b41193b43b2
Date: 2017-11-12 21:10 +0100
http://bitbucket.org/pypy/pypy/changeset/8b41193b43b2/

Log:	Fix ulllong division OP in rtyper

diff --git a/rpython/rtyper/rint.py b/rpython/rtyper/rint.py
--- a/rpython/rtyper/rint.py
+++ b/rpython/rtyper/rint.py
@@ -476,7 +476,7 @@
 
 @jit.dont_look_inside
 def ll_ulllong_py_div(x, y):
-    return llop.ullong_floordiv(UnsignedLongLongLong, x, y)
+    return llop.ulllong_floordiv(UnsignedLongLongLong, x, y)
 
 def ll_ulllong_py_div_zer(x, y):
     if y == 0:

From pypy.commits at gmail.com  Sun Nov 12 17:16:24 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 12 Nov 2017 14:16:24 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on
 PyCFunctions
Message-ID: <5a08c838.21b9df0a.93d86.5c2e@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92998:a626dd21b1fa
Date: 2017-11-12 20:11 +0000
http://bitbucket.org/pypy/pypy/changeset/a626dd21b1fa/

Log:	Implement __text_signature__ on PyCFunctions

diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py
--- a/pypy/module/cpyext/methodobject.py
+++ b/pypy/module/cpyext/methodobject.py
@@ -43,6 +43,39 @@
     from pypy.module.cpyext.object import _dealloc
     _dealloc(space, py_obj)
 
+def undotted_name(name):
+    """Return the last component of a dotted name"""
+    dotpos = name.rfind('.')
+    if dotpos < 0:
+        return name
+    else:
+        return name[dotpos + 1:]
+
+SIGNATURE_MARKER = ')\n--\n\n'
+
+def extract_doc(raw_doc, name):
+    doc = raw_doc
+    name = undotted_name(name)
+    if raw_doc.startswith(name + '('):
+        end_sig = raw_doc.find(SIGNATURE_MARKER)
+        if end_sig > 0:
+            doc = raw_doc[end_sig + len(SIGNATURE_MARKER):]
+    if not doc:
+        return None
+    return doc
+
+def extract_txtsig(raw_doc, name):
+    name = undotted_name(name)
+    if raw_doc.startswith(name + '('):
+        end_sig = raw_doc.find(SIGNATURE_MARKER)
+        if end_sig > 0:
+            # Notes:
+            # * Parentheses are included
+            # * SIGNATURE_MARKER cannot appear inside name,
+            #   so end_sig > len(name)
+            return raw_doc[len(name): end_sig + 1]
+    return None
+
 class W_PyCFunctionObject(W_Root):
     # TODO create a slightly different class depending on the c_ml_flags
     def __init__(self, space, ml, w_self, w_module=None):
@@ -84,11 +117,22 @@
             raise oefmt(space.w_RuntimeError, "unknown calling convention")
 
     def get_doc(self, space):
-        doc = self.ml.c_ml_doc
-        if doc:
-            return space.newtext(rffi.charp2str(rffi.cast(rffi.CCHARP,doc)))
-        else:
-            return space.w_None
+        c_doc = self.ml.c_ml_doc
+        if c_doc:
+            rawdoc = rffi.charp2str(rffi.cast(rffi.CCHARP, c_doc))
+            doc = extract_doc(rawdoc, self.name)
+            if doc is not None:
+                return space.newtext(doc)
+        return space.w_None
+
+    def get_txtsig(self, space):
+        c_doc = self.ml.c_ml_doc
+        if c_doc:
+            rawdoc = rffi.charp2str(rffi.cast(rffi.CCHARP, c_doc))
+            txtsig = extract_txtsig(rawdoc, self.name)
+            if txtsig is not None:
+                return space.newtext(txtsig)
+        return space.w_None
 
 class W_PyCFunctionObjectNoArgs(W_PyCFunctionObject):
     def call(self, space, w_self, w_args, w_kw):
@@ -289,6 +333,7 @@
     'builtin_function_or_method',
     __call__ = interp2app(cfunction_descr_call),
     __doc__ = GetSetProperty(W_PyCFunctionObject.get_doc),
+    __text_signature__ = GetSetProperty(W_PyCFunctionObject.get_txtsig),
     __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObject),
     __name__ = interp_attrproperty('name', cls=W_PyCFunctionObject,
         wrapfn="newtext_or_none"),
@@ -299,6 +344,7 @@
     'builtin_function_or_method', W_PyCFunctionObject.typedef,
     __call__ = interp2app(cfunction_descr_call_noargs),
     __doc__ = GetSetProperty(W_PyCFunctionObjectNoArgs.get_doc),
+    __text_signature__ = GetSetProperty(W_PyCFunctionObjectNoArgs.get_txtsig),
     __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObjectNoArgs),
     __name__ = interp_attrproperty('name', cls=W_PyCFunctionObjectNoArgs,
         wrapfn="newtext_or_none"),
@@ -309,6 +355,7 @@
     'builtin_function_or_method', W_PyCFunctionObject.typedef,
     __call__ = interp2app(cfunction_descr_call_single_object),
     __doc__ = GetSetProperty(W_PyCFunctionObjectSingleObject.get_doc),
+    __text_signature__ = GetSetProperty(W_PyCFunctionObjectSingleObject.get_txtsig),
     __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObjectSingleObject),
     __name__ = interp_attrproperty('name', cls=W_PyCFunctionObjectSingleObject,
         wrapfn="newtext_or_none"),
diff --git a/pypy/module/cpyext/test/docstrings.c b/pypy/module/cpyext/test/docstrings.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/docstrings.c
@@ -0,0 +1,149 @@
+#include "Python.h"
+
+static PyObject *
+test_with_docstring(PyObject *self)
+{
+    Py_RETURN_NONE;
+}
+
+PyDoc_STRVAR(empty_doc,
+""
+);
+
+PyDoc_STRVAR(no_sig,
+"This docstring has no signature."
+);
+
+PyDoc_STRVAR(invalid_sig,
+"invalid_sig($module, /, boo)\n"
+"\n"
+"This docstring has an invalid signature."
+);
+
+PyDoc_STRVAR(invalid_sig2,
+"invalid_sig2($module, /, boo)\n"
+"\n"
+"--\n"
+"\n"
+"This docstring also has an invalid signature."
+);
+
+PyDoc_STRVAR(with_sig,
+"with_sig($module, /, sig)\n"
+"--\n"
+"\n"
+"This docstring has a valid signature."
+);
+
+PyDoc_STRVAR(with_sig_but_no_doc,
+"with_sig_but_no_doc($module, /, sig)\n"
+"--\n"
+"\n"
+);
+
+PyDoc_STRVAR(with_signature_and_extra_newlines,
+"with_signature_and_extra_newlines($module, /, parameter)\n"
+"--\n"
+"\n"
+"\n"
+"This docstring has a valid signature and some extra newlines."
+);
+
+
+static PyMethodDef methods[] = {
+    {"no_doc",
+        (PyCFunction)test_with_docstring, METH_NOARGS},
+    {"empty_doc",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        empty_doc},
+    {"no_sig",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        no_sig},
+    {"invalid_sig",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        invalid_sig},
+    {"invalid_sig2",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        invalid_sig2},
+    {"with_sig",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        with_sig},
+    {"with_sig_but_no_doc",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        with_sig_but_no_doc},
+    {"with_signature_and_extra_newlines",
+        (PyCFunction)test_with_docstring, METH_NOARGS,
+        with_signature_and_extra_newlines},
+    {NULL, NULL} /* sentinel */
+};
+
+
+static PyType_Slot HeapType_slots[] = {
+    {Py_tp_doc, "HeapType()\n--\n\nA type with a signature"},
+    {0, 0},
+};
+
+static PyType_Spec HeapType_spec = {
+    "docstrings.HeapType",
+    sizeof(PyObject),
+    0,
+    Py_TPFLAGS_DEFAULT,
+    HeapType_slots
+};
+
+static PyTypeObject SomeType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "docstrings.SomeType",      /* tp_name */
+    sizeof(PyObject),           /* tp_basicsize */
+    0,                         /* tp_itemsize */
+    0,                         /* tp_dealloc */
+    0,                         /* tp_print */
+    0,                         /* tp_getattr */
+    0,                         /* tp_setattr */
+    0,                         /* tp_reserved */
+    0,                         /* tp_repr */
+    0,                         /* tp_as_number */
+    0,                         /* tp_as_sequence */
+    0,                         /* tp_as_mapping */
+    0,                         /* tp_hash  */
+    0,                         /* tp_call */
+    0,                         /* tp_str */
+    0,                         /* tp_getattro */
+    0,                         /* tp_setattro */
+    0,                         /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,        /* tp_flags */
+    "SomeType()\n--\n\nA type with a signature",    /* tp_doc */
+};
+
+
+static struct PyModuleDef def = {
+    PyModuleDef_HEAD_INIT,
+    "docstrings",
+    NULL,
+    -1,
+    methods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+
+PyMODINIT_FUNC
+PyInit_docstrings(void)
+{
+    PyObject *m, *tmp;
+    m = PyModule_Create(&def);
+    if (m == NULL)
+        return NULL;
+    tmp = PyType_FromSpec(&HeapType_spec);
+    if (tmp == NULL)
+        return NULL;
+    if (PyModule_AddObject(m, "HeapType", tmp) != 0)
+        return NULL;
+    if (PyType_Ready(&SomeType) < 0)
+        return NULL;
+    if (PyModule_AddObject(m, "SomeType", (PyObject*)&SomeType) != 0)
+        return NULL;
+    return m;
+}
diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -100,3 +100,23 @@
         assert mod.check(A) == 0
         assert mod.check(A.meth) == 0
         assert mod.check(A.stat) == 0
+
+    def test_text_signature(self):
+        mod = self.import_module('docstrings')
+        assert mod.no_doc.__doc__ is None
+        assert mod.no_doc.__text_signature__ is None
+        assert mod.empty_doc.__doc__ is None
+        assert mod.empty_doc.__text_signature__ is None
+        assert mod.no_sig.__doc__
+        assert mod.no_sig.__text_signature__ is None
+        assert mod.invalid_sig.__doc__
+        assert mod.invalid_sig.__text_signature__ is None
+        assert mod.invalid_sig2.__doc__
+        assert mod.invalid_sig2.__text_signature__ is None
+        assert mod.with_sig.__doc__
+        assert mod.with_sig.__text_signature__ == '($module, /, sig)'
+        assert mod.with_sig_but_no_doc.__doc__ is None
+        assert mod.with_sig_but_no_doc.__text_signature__ == '($module, /, sig)'
+        assert mod.with_signature_and_extra_newlines.__doc__
+        assert (mod.with_signature_and_extra_newlines.__text_signature__ ==
+                '($module, /, parameter)')

From pypy.commits at gmail.com  Sun Nov 12 17:16:26 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 12 Nov 2017 14:16:26 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on C-defined
 types
Message-ID: <5a08c83a.51a9df0a.44f05.09e5@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r92999:a6bc26a09fc3
Date: 2017-11-12 22:15 +0000
http://bitbucket.org/pypy/pypy/changeset/a6bc26a09fc3/

Log:	Implement __text_signature__ on C-defined types

diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -453,6 +453,16 @@
         p = property(lambda: "never used", pset, pdel)
         assert module.tp_descr_set(p) is True
 
+    def test_text_signature(self):
+        module = self.import_module(name='docstrings')
+        assert module.SomeType.__text_signature__ == '()'
+        assert module.SomeType.__doc__ == 'A type with a signature'
+        if '__pypy__' in sys.modules:
+            assert module.HeapType.__text_signature__ == '()'
+        else:  # XXX: bug in CPython?
+            assert module.HeapType.__text_signature__ is None
+        assert module.HeapType.__doc__ == 'A type with a signature'
+
 
 class TestTypes(BaseApiTest):
     def test_type_attributes(self, space, api):
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -23,7 +23,7 @@
 from pypy.module.cpyext.cparser import CTypeSpace
 from pypy.module.cpyext.methodobject import (W_PyCClassMethodObject,
     W_PyCWrapperObject, PyCFunction_NewEx, PyCFunction, PyMethodDef,
-    W_PyCMethodObject, W_PyCFunctionObject)
+    W_PyCMethodObject, W_PyCFunctionObject, extract_doc, extract_txtsig)
 from pypy.module.cpyext.modsupport import convert_method_defs
 from pypy.module.cpyext.pyobject import (
     PyObject, make_ref, from_ref, get_typedescr, make_typedescr,
@@ -331,7 +331,7 @@
         if not getattr(struct, slot_names[1]):
             setattr(struct, slot_names[1], slot_func_helper)
 
-def add_operators(space, dict_w, pto):
+def add_operators(space, dict_w, pto, name):
     from pypy.module.cpyext.object import PyObject_HashNotImplemented
     hash_not_impl = PyObject_HashNotImplemented.api_func.get_llhelper(space)
     for method_name, slot_names, wrapper_func, wrapper_func_kwds, doc in slotdefs_for_wrappers:
@@ -361,8 +361,8 @@
                 wrapper_func_kwds, doc, func_voidp, offset=offset)
         dict_w[method_name] = w_obj
     if pto.c_tp_doc:
-        dict_w['__doc__'] = space.newtext(
-            rffi.charp2str(cts.cast('char*', pto.c_tp_doc)))
+        raw_doc = rffi.charp2str(cts.cast('char*', pto.c_tp_doc))
+        dict_w['__doc__'] = space.newtext(extract_doc(raw_doc, name))
     if pto.c_tp_new:
         add_tp_new_wrapper(space, dict_w, pto)
 
@@ -504,12 +504,12 @@
         bases_w = space.fixedview(from_ref(space, pto.c_tp_bases))
         dict_w = {}
 
-        add_operators(space, dict_w, pto)
+        name = rffi.charp2str(cts.cast('char*', pto.c_tp_name))
+        add_operators(space, dict_w, pto, name)
         convert_method_defs(space, dict_w, pto.c_tp_methods, self)
         convert_getset_defs(space, dict_w, pto.c_tp_getset, self)
         convert_member_defs(space, dict_w, pto.c_tp_members, self)
 
-        name = rffi.charp2str(cts.cast('char*', pto.c_tp_name))
         flag_heaptype = pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE
         if flag_heaptype:
             minsize = rffi.sizeof(PyHeapTypeObject.TO)
@@ -527,8 +527,9 @@
         elif pto.c_tp_as_mapping and pto.c_tp_as_mapping.c_mp_subscript:
             self.flag_map_or_seq = 'M'
         if pto.c_tp_doc:
-            self.w_doc = space.newtext(
-                rffi.charp2str(cts.cast('char*', pto.c_tp_doc)))
+            rawdoc = rffi.charp2str(cts.cast('char*', pto.c_tp_doc))
+            self.w_doc = space.newtext_or_none(extract_doc(rawdoc, name))
+            self.text_signature = extract_txtsig(rawdoc, name)
 
 @bootstrap_function
 def init_typeobject(space):

From pypy.commits at gmail.com  Sun Nov 12 17:18:59 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sun, 12 Nov 2017 14:18:59 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Enable __text_signature__ tests in
 CPython test suite
Message-ID: <5a08c8d3.84b5df0a.b013e.a97a@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93000:49df4f50208f
Date: 2017-11-12 22:18 +0000
http://bitbucket.org/pypy/pypy/changeset/49df4f50208f/

Log:	Enable __text_signature__ tests in CPython test suite

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -769,7 +769,6 @@
              kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True},
              formatted='(path, *, dir_fd=None, follow_symlinks=True)')
 
-    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_getfullagrspec_builtin_func(self):
@@ -778,7 +777,6 @@
         spec = inspect.getfullargspec(builtin)
         self.assertEqual(spec.defaults[0], 'avocado')
 
-    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_getfullagrspec_builtin_func_no_signature(self):
@@ -1959,7 +1957,6 @@
                            ('kwargs', ..., int, "var_keyword")),
                           ...))
 
-    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_signature_on_builtins(self):
@@ -2033,7 +2030,6 @@
         # Regression test for issue #20586
         test_callable(_testcapi.docstring_with_signature_but_no_doc)
 
-    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_signature_on_decorated_builtins(self):
@@ -2056,7 +2052,6 @@
                                            follow_wrapped=False),
                          inspect.signature(wrapper_like))
 
-    @cpython_only
     def test_signature_on_builtins_no_signature(self):
         import _testcapi
         with self.assertRaisesRegex(ValueError,
@@ -3417,7 +3412,6 @@
     # This test case provides a home for checking that particular APIs
     # have signatures available for introspection
 
-    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_builtins_have_signatures(self):

From pypy.commits at gmail.com  Mon Nov 13 06:14:33 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Mon, 13 Nov 2017 03:14:33 -0800 (PST)
Subject: [pypy-commit] pypy default: fix issue #2701
Message-ID: <5a097e99.54d91c0a.b9257.b4f8@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r93001:b95f1240ad90
Date: 2017-11-13 12:13 +0100
http://bitbucket.org/pypy/pypy/changeset/b95f1240ad90/

Log:	fix issue #2701

	allow the sequences future-import, docstring, future-import for
	CPython bug-compatibility

diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -85,13 +85,17 @@
     # permissive parsing of the given list of tokens; it relies on
     # the real parsing done afterwards to give errors.
     it.skip_newlines()
-    it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
-    if it.skip(pygram.tokens.STRING):
-        it.skip_newlines()
 
-    while (it.skip_name("from") and
+    docstring_possible = True
+    while True:
+        it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
+        if docstring_possible and it.skip(pygram.tokens.STRING):
+            it.skip_newlines()
+            docstring_possible = False
+        if not (it.skip_name("from") and
            it.skip_name("__future__") and
            it.skip_name("import")):
+            break
         it.skip(pygram.tokens.LPAR)    # optionally
         # return in 'last_position' any line-column pair that points
         # somewhere inside the last __future__ import statement
diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py
--- a/pypy/interpreter/pyparser/test/test_future.py
+++ b/pypy/interpreter/pyparser/test/test_future.py
@@ -208,3 +208,13 @@
          'from __future__ import with_statement;')
     f = run(s, (2, 23))
     assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT
+
+def test_future_doc_future():
+    # for some reason people do this :-[
+    s = '''
+from  __future__ import generators
+"Docstring"
+from  __future__ import division
+    '''
+    f = run(s, (4, 24))
+    assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED

From pypy.commits at gmail.com  Mon Nov 13 07:53:58 2017
From: pypy.commits at gmail.com (stian)
Date: Mon, 13 Nov 2017 04:53:58 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Remove invert logic from
 rqshift (it is only used with positive numbers)
Message-ID: <5a0995e6.45aa1c0a.8fd97.8e7f@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93002:3f4aca709e49
Date: 2017-11-13 13:53 +0100
http://bitbucket.org/pypy/pypy/changeset/3f4aca709e49/

Log:	Remove invert logic from rqshift (it is only used with positive
	numbers)

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -1314,38 +1314,22 @@
         wordshift = int_other / SHIFT
         loshift = int_other % SHIFT
         newsize = self.numdigits() - wordshift
-
-        invert = False
-        if self.sign == -1:
-            first = self.digit(0)
-            if first == 0:
-                a = self.invert().rqshift(int_other)
-                return a.invert()
-            invert = True
             
         if newsize <= 0:
-            if invert:
-                return ONENEGATIVERBIGINT
-            else:
-                return NULLRBIGINT
+            return NULLRBIGINT
                 
-        
         hishift = SHIFT - loshift
         z = rbigint([NULLDIGIT] * newsize, self.sign, newsize)
         i = 0
 
         while i < newsize:
             digit = self.udigit(wordshift)
-            if invert and i == 0 and wordshift == 0:
-                digit -= 1
             newdigit = (digit >> loshift)
             if i+1 < newsize:
                 newdigit |= (self.udigit(wordshift+1) << hishift)
             z.setdigit(i, newdigit)
             i += 1
-            wordshift += 1
-        if invert:
-            z.setdigit(0, z.digit(0)+1)      
+            wordshift += 1 
         z._normalize()
         return z
     rshift._always_inline_ = 'try' # It's so fast that it's always benefitial.
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -609,21 +609,18 @@
                     res1 = f1.lqshift(z).tolong() 
                     res2 = f1.rqshift(z).tolong() 
                     res3 = nf1.lqshift(z).tolong() 
-                    res4 = nf1.rqshift(z).tolong() 
+                     
                     
                     assert res1 == num << z
                     assert res2 == num >> z
                     assert res3 == -num << z
-                    assert res4 == -num >> z
                     
-        # Large digit, also invertion test.
+                    
+        # Large digit
         for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10):
             f1 = rbigint.fromlong(x)
-            nf1 = rbigint.fromlong(-x)
             assert f1.rqshift(SHIFT).tolong() == x >> SHIFT 
-            assert nf1.rqshift(SHIFT).tolong() == -x >> SHIFT
             assert f1.rqshift(SHIFT+1).tolong() == x >> (SHIFT+1)
-            assert nf1.rqshift(SHIFT+1).tolong() == -x >> (SHIFT+1)
                     
     def test_from_list_n_bits(self):
         for x in ([3L ** 30L, 5L ** 20L, 7 ** 300] +

From pypy.commits at gmail.com  Mon Nov 13 11:55:51 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 08:55:51 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: backout c7e665a4d094: this hack isn't
 needed any more
Message-ID: <5a09ce97.b796df0a.f26c0.2e6b@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93003:e73ed06e7955
Date: 2017-11-13 16:53 +0000
http://bitbucket.org/pypy/pypy/changeset/e73ed06e7955/

Log:	backout c7e665a4d094: this hack isn't needed any more

diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py
--- a/lib-python/3/inspect.py
+++ b/lib-python/3/inspect.py
@@ -2078,8 +2078,6 @@
 
     s = getattr(func, "__text_signature__", None)
     if not s:
-        if func is object:  # XXX PyPy hack until we support __text_signature__
-            return '()'     # in the same cases as CPython
         raise ValueError("no signature found for builtin {!r}".format(func))
 
     return _signature_fromstr(cls, func, s, skip_bound_arg)

From pypy.commits at gmail.com  Mon Nov 13 13:15:03 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 10:15:03 -0800 (PST)
Subject: [pypy-commit] pypy default: Test an obscure difference between
 C-defined and Python-defined functions
Message-ID: <5a09e127.0ec6df0a.73939.4fa3@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93004:3b8c612bb506
Date: 2017-11-13 18:14 +0000
http://bitbucket.org/pypy/pypy/changeset/3b8c612bb506/

Log:	Test an obscure difference between C-defined and Python-defined
	functions

diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -93,6 +93,22 @@
             assert mod.isSameFunction(mod.getarg_O)
         raises(SystemError, mod.isSameFunction, 1)
 
+    def test_function_as_method(self):
+        # Unlike user functions, builtins don't become methods
+        mod = self.import_extension('foo', [
+            ('f', 'METH_NOARGS',
+            '''
+                return PyLong_FromLong(42);
+            '''),
+            ])
+        class A(object): pass
+        A.f = mod.f
+        A.g = lambda: 42
+        assert A.f() == 42
+        raises(TypeError, A.g)
+        assert A().f() == 42
+        raises(TypeError, A().g)
+
     def test_check(self):
         mod = self.import_extension('foo', [
             ('check', 'METH_O',
@@ -116,4 +132,3 @@
         assert mod.check(A) == 0
         assert mod.check(A.meth) == 0
         assert mod.check(A.stat) == 0
- 

From pypy.commits at gmail.com  Mon Nov 13 15:31:00 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 12:31:00 -0800 (PST)
Subject: [pypy-commit] pypy default: Implement cpyext.is_cpyext_function()
Message-ID: <5a0a0104.cc1d1c0a.1ad51.7381@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93005:a305590465d6
Date: 2017-11-13 20:30 +0000
http://bitbucket.org/pypy/pypy/changeset/a305590465d6/

Log:	Implement cpyext.is_cpyext_function()

	inspect.isbuiltin() now returns True for functions implemented in C,
	like on CPython.

diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py
--- a/lib-python/2.7/inspect.py
+++ b/lib-python/2.7/inspect.py
@@ -40,6 +40,10 @@
 import linecache
 from operator import attrgetter
 from collections import namedtuple
+try:
+    from cpyext import is_cpyext_function as _is_cpyext_function
+except ImportError:
+    _is_cpyext_function = lambda obj: False
 
 # These constants are from Include/code.h.
 CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8
@@ -230,7 +234,7 @@
         __doc__         documentation string
         __name__        original name of this function or method
         __self__        instance to which a method is bound, or None"""
-    return isinstance(object, types.BuiltinFunctionType)
+    return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object)
 
 def isroutine(object):
     """Return true if the object is any kind of function or method."""
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -5,6 +5,7 @@
 class Module(MixedModule):
     interpleveldefs = {
         'load_module': 'api.load_extension_module',
+        'is_cpyext_function': 'interp_cpyext.is_cpyext_function',
     }
 
     appleveldefs = {
diff --git a/pypy/module/cpyext/interp_cpyext.py b/pypy/module/cpyext/interp_cpyext.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/interp_cpyext.py
@@ -0,0 +1,4 @@
+from .methodobject import W_PyCFunctionObject
+
+def is_cpyext_function(space, w_arg):
+    return space.newbool(isinstance(w_arg, W_PyCFunctionObject))
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -381,6 +381,11 @@
 
     def test_export_function(self):
         import sys
+        if '__pypy__' in sys.modules:
+            from cpyext import is_cpyext_function
+        else:
+            import inspect
+            is_cpyext_function = inspect.isbuiltin
         init = """
         if (Py_IsInitialized())
             Py_InitModule("foo", methods);
@@ -399,6 +404,7 @@
         assert 'foo' in sys.modules
         assert 'return_pi' in dir(module)
         assert module.return_pi is not None
+        assert is_cpyext_function(module.return_pi)
         assert module.return_pi() == 3.14
         assert module.return_pi.__module__ == 'foo'
 
@@ -777,14 +783,14 @@
         # Set an exception and return NULL
         raises(TypeError, module.set, None)
 
-        # clear any exception and return a value 
+        # clear any exception and return a value
         assert module.clear(1) == 1
 
         # Set an exception, but return non-NULL
         expected = 'An exception was set, but function returned a value'
         exc = raises(SystemError, module.set, 1)
         assert exc.value[0] == expected
-        
+
 
         # Clear the exception and return a value, all is OK
         assert module.clear(1) == 1
diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -104,8 +104,10 @@
         class A(object): pass
         A.f = mod.f
         A.g = lambda: 42
+        # Unbound method
         assert A.f() == 42
         raises(TypeError, A.g)
+        # Bound method
         assert A().f() == 42
         raises(TypeError, A().g)
 

From pypy.commits at gmail.com  Mon Nov 13 16:07:52 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 13:07:52 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a0a09a8.17361c0a.4db14.2372@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93006:19326fb34a67
Date: 2017-11-13 21:07 +0000
http://bitbucket.org/pypy/pypy/changeset/19326fb34a67/

Log:	hg merge default

diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
--- a/extra_tests/test_bytes.py
+++ b/extra_tests/test_bytes.py
@@ -1,25 +1,27 @@
 from hypothesis import strategies as st
 from hypothesis import given, example
 
- at given(st.binary(), st.binary(), st.binary())
+st_bytestring = st.binary() | st.binary().map(bytearray)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_find(u, prefix, suffix):
     s = prefix + u + suffix
     assert 0 <= s.find(u) <= len(prefix)
     assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_index(u, prefix, suffix):
     s = prefix + u + suffix
     assert 0 <= s.index(u) <= len(prefix)
     assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_rfind(u, prefix, suffix):
     s = prefix + u + suffix
     assert s.rfind(u) >= len(prefix)
     assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
 
- at given(st.binary(), st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring, st_bytestring)
 def test_rindex(u, prefix, suffix):
     s = prefix + u + suffix
     assert s.rindex(u) >= len(prefix)
@@ -34,20 +36,20 @@
         start = max(start + len(u), 0)
     return start, end
 
- at given(st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring)
 def test_startswith_basic(u, v):
     assert u.startswith(v) is (u[:len(v)] == v)
 
 @example(b'x', b'', 1)
 @example(b'x', b'', 2)
- at given(st.binary(), st.binary(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers())
 def test_startswith_start(u, v, start):
     expected = u[start:].startswith(v) if v else (start <= len(u))
     assert u.startswith(v, start) is expected
 
 @example(b'x', b'', 1, 0)
 @example(b'xx', b'', -1, 0)
- at given(st.binary(), st.binary(), st.integers(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
 def test_startswith_3(u, v, start, end):
     if v:
         expected = u[start:end].startswith(v)
@@ -56,7 +58,7 @@
         expected = start0 <= len(u) and start0 <= end0
     assert u.startswith(v, start, end) is expected
 
- at given(st.binary(), st.binary())
+ at given(st_bytestring, st_bytestring)
 def test_endswith_basic(u, v):
     if len(v) > len(u):
         assert u.endswith(v) is False
@@ -65,14 +67,14 @@
 
 @example(b'x', b'', 1)
 @example(b'x', b'', 2)
- at given(st.binary(), st.binary(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers())
 def test_endswith_2(u, v, start):
     expected = u[start:].endswith(v) if v else (start <= len(u))
     assert u.endswith(v, start) is expected
 
 @example(b'x', b'', 1, 0)
 @example(b'xx', b'', -1, 0)
- at given(st.binary(), st.binary(), st.integers(), st.integers())
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
 def test_endswith_3(u, v, start, end):
     if v:
         expected = u[start:end].endswith(v)
diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py
--- a/lib-python/2.7/inspect.py
+++ b/lib-python/2.7/inspect.py
@@ -40,6 +40,10 @@
 import linecache
 from operator import attrgetter
 from collections import namedtuple
+try:
+    from cpyext import is_cpyext_function as _is_cpyext_function
+except ImportError:
+    _is_cpyext_function = lambda obj: False
 
 # These constants are from Include/code.h.
 CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8
@@ -230,7 +234,7 @@
         __doc__         documentation string
         __name__        original name of this function or method
         __self__        instance to which a method is bound, or None"""
-    return isinstance(object, types.BuiltinFunctionType)
+    return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object)
 
 def isroutine(object):
     """Return true if the object is any kind of function or method."""
diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py
--- a/lib-python/2.7/warnings.py
+++ b/lib-python/2.7/warnings.py
@@ -43,11 +43,12 @@
         unicodetype = unicode
     except NameError:
         unicodetype = ()
+    template = "%s: %s: %s\n"
     try:
         message = str(message)
     except UnicodeEncodeError:
-        pass
-    s =  "%s: %s: %s\n" % (lineno, category.__name__, message)
+        template = unicode(template)
+    s = template % (lineno, category.__name__, message)
     line = linecache.getline(filename, lineno) if line is None else line
     if line:
         line = line.strip()
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -87,13 +87,17 @@
     # permissive parsing of the given list of tokens; it relies on
     # the real parsing done afterwards to give errors.
     it.skip_newlines()
-    it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
-    if it.skip(pygram.tokens.STRING):
-        it.skip_newlines()
 
-    while (it.skip_name("from") and
+    docstring_possible = True
+    while True:
+        it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
+        if docstring_possible and it.skip(pygram.tokens.STRING):
+            it.skip_newlines()
+            docstring_possible = False
+        if not (it.skip_name("from") and
            it.skip_name("__future__") and
            it.skip_name("import")):
+            break
         it.skip(pygram.tokens.LPAR)    # optionally
         # return in 'last_position' any line-column pair that points
         # somewhere inside the last __future__ import statement
diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py
--- a/pypy/interpreter/pyparser/test/test_future.py
+++ b/pypy/interpreter/pyparser/test/test_future.py
@@ -193,3 +193,13 @@
          'from __future__ import with_statement;')
     f = run(s, (2, 23))
     assert f == 0
+
+def test_future_doc_future():
+    # for some reason people do this :-[
+    s = '''
+from  __future__ import generators
+"Docstring"
+from  __future__ import division
+    '''
+    f = run(s, (4, 24))
+    assert f == 0
diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py
--- a/pypy/module/_warnings/interp_warnings.py
+++ b/pypy/module/_warnings/interp_warnings.py
@@ -292,7 +292,7 @@
     if space.isinstance_w(w_message, space.w_Warning):
         w_text = space.str(w_message)
         w_category = space.type(w_message)
-    elif (not space.isinstance_w(w_message, space.w_unicode) or
+    elif (not space.isinstance_w(w_message, space.w_unicode) and
           not space.isinstance_w(w_message, space.w_bytes)):
         w_text = space.str(w_message)
         w_message = space.call_function(w_category, w_message)
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -4,6 +4,7 @@
 
 class Module(MixedModule):
     interpleveldefs = {
+        'is_cpyext_function': 'interp_cpyext.is_cpyext_function',
     }
 
     appleveldefs = {
@@ -41,7 +42,6 @@
 import pypy.module.cpyext.pyerrors
 import pypy.module.cpyext.typeobject
 import pypy.module.cpyext.object
-import pypy.module.cpyext.buffer
 import pypy.module.cpyext.bytesobject
 import pypy.module.cpyext.bytearrayobject
 import pypy.module.cpyext.tupleobject
@@ -50,6 +50,7 @@
 import pypy.module.cpyext.longobject
 import pypy.module.cpyext.listobject
 import pypy.module.cpyext.sequence
+import pypy.module.cpyext.buffer
 import pypy.module.cpyext.eval
 import pypy.module.cpyext.import_
 import pypy.module.cpyext.mapping
diff --git a/pypy/module/cpyext/interp_cpyext.py b/pypy/module/cpyext/interp_cpyext.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/interp_cpyext.py
@@ -0,0 +1,4 @@
+from .methodobject import W_PyCFunctionObject
+
+def is_cpyext_function(space, w_arg):
+    return space.newbool(isinstance(w_arg, W_PyCFunctionObject))
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -375,6 +375,11 @@
 
     def test_export_function(self):
         import sys
+        if '__pypy__' in sys.modules:
+            from cpyext import is_cpyext_function
+        else:
+            import inspect
+            is_cpyext_function = inspect.isbuiltin
         body = """
         PyObject* foo_pi(PyObject* self, PyObject *args)
         {
@@ -396,6 +401,7 @@
         assert 'foo' in sys.modules
         assert 'return_pi' in dir(module)
         assert module.return_pi is not None
+        assert is_cpyext_function(module.return_pi)
         assert module.return_pi() == 3.14
         assert module.return_pi.__module__ == 'foo'
 
diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py
--- a/pypy/module/cpyext/test/test_methodobject.py
+++ b/pypy/module/cpyext/test/test_methodobject.py
@@ -77,6 +77,23 @@
             assert mod.isSameFunction(mod.getarg_O)
         raises(SystemError, mod.isSameFunction, 1)
 
+    def test_function_as_method(self):
+        # Unlike user functions, builtins don't become methods
+        mod = self.import_extension('foo', [
+            ('f', 'METH_NOARGS',
+            '''
+                return PyLong_FromLong(42);
+            '''),
+            ])
+        class A(object): pass
+        A.f = mod.f
+        A.g = lambda: 42
+        # Unbound method
+        assert A.f() == A.g() == 42
+        # Bound method
+        assert A().f() == 42
+        raises(TypeError, A().g)
+
     def test_check(self):
         mod = self.import_extension('foo', [
             ('check', 'METH_O',
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -362,6 +362,7 @@
 
     @specialize.argtype(1)
     def _inplace_add(self, other):
+        resizelist_hint(self._data, len(self._data) + len(other))
         for i in range(len(other)):
             self._data.append(other[i])
 
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 cffi>=1.4.0
+vmprof>=0.4.10  # required to parse log files in rvmprof tests
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis
diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -15,10 +15,34 @@
     typeof, s_ImpossibleValue, SomeInstance, intersection, difference)
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
+from collections import deque
 
 log = AnsiLogger("annrpython")
 
 
+class ShuffleDict(object):
+    def __init__(self):
+        self._d = {}
+        self.keys = deque()
+
+    def __setitem__(self, k, v):
+        if k in self._d:
+            self._d[k] = v
+        else:
+            self._d[k] = v
+            self.keys.append(k)
+
+    def __getitem__(self, k):
+        return self._d[k]
+
+    def popitem(self):
+        key = self.keys.popleft()
+        item = self._d.pop(key)
+        return (key, item)
+
+    def __nonzero__(self):
+        return bool(self._d)
+
 class RPythonAnnotator(object):
     """Block annotator for RPython.
     See description in doc/translation.txt."""
@@ -33,7 +57,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = {}  # map {block: graph-containing-it}
+        self.pendingblocks = ShuffleDict()  # map {block: graph-containing-it}
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -39,9 +39,7 @@
 CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache'))
 
 PLATFORMS = [
-    'maemo',
     'host',
-    'distutils',
     'arm',
 ]
 
diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c
--- a/rpython/rlib/rvmprof/src/shared/machine.c
+++ b/rpython/rlib/rvmprof/src/shared/machine.c
@@ -28,7 +28,7 @@
 #elif __linux__
     return "linux";
 #elif __FreeBSD__
-    return "freebsd"
+    return "freebsd";
 #else
     #error "Unknown compiler"
 #endif
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
--- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -29,6 +29,7 @@
 static int (*unw_is_signal_frame)(unw_cursor_t *) = NULL;
 static int (*unw_getcontext)(unw_context_t *) = NULL;
 #else
+#define UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -32,12 +32,21 @@
 static size_t threads_size = 0;
 static size_t thread_count = 0;
 static size_t threads_size_step = 8;
-#endif
 
 int vmprof_get_itimer_type(void) {
     return itimer_type;
 }
 
+int vmprof_get_signal_type(void) {
+    return signal_type;
+}
+#endif
+
+#ifdef VMPROF_WINDOWS
+#include "vmprof_win.h"
+#endif
+
+
 int vmprof_is_enabled(void) {
     return is_enabled;
 }
@@ -62,10 +71,6 @@
     profile_interval_usec = value;
 }
 
-int vmprof_get_signal_type(void) {
-    return signal_type;
-}
-
 char *vmprof_init(int fd, double interval, int memory,
                   int proflines, const char *interp_name, int native, int real_time)
 {
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -15,7 +15,9 @@
 #include <pthread.h>
 #endif
 
+#ifdef VMPROF_UNIX
 #include "vmprof_getpc.h"
+#endif
 
 #ifdef VMPROF_LINUX
 #include <syscall.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c
@@ -8,7 +8,7 @@
 #include <mach/task_info.h>
 
 static mach_port_t mach_task;
-#else
+#elif defined(VMPROF_UNIX)
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/types.h>
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c
@@ -41,8 +41,6 @@
 void vmprof_ignore_signals(int ignored)
 {
     if (ignored) {
-        /* set the last bit, and wait until concurrently-running signal
-           handlers finish */
         __sync_add_and_fetch(&signal_handler_ignore, 1L);
         while (signal_handler_entries != 0L) {
             usleep(1);
@@ -370,7 +368,7 @@
         goto error;
     if (install_sigprof_timer() == -1)
         goto error;
-    vmprof_ignore_signals(0);
+    signal_handler_ignore = 0;
     return 0;
 
  error:
@@ -394,7 +392,7 @@
 
 int vmprof_disable(void)
 {
-    vmprof_ignore_signals(1);
+    signal_handler_ignore = 1;
     vmprof_set_profile_interval_usec(0);
 #ifdef VMP_SUPPORTS_NATIVE_PROFILING
     disable_cpyprof();
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c
@@ -1,7 +1,7 @@
-// cannot include this header because it also has definitions
-#include "windows.h"
-#include "compat.h"
-#include "vmp_stack.h"
+#include "vmprof_win.h"
+
+volatile int thread_started = 0;
+volatile int enabled = 0;
 
 HANDLE write_mutex;
 
@@ -12,7 +12,20 @@
     return 0;
 }
 
-#include <tlhelp32.h>
+int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
+                                     int auto_retry)
+{
+    char buf[2048];
+    long namelen;
+
+    namelen = (long)strnlen(code_name, 1023);
+    buf[0] = MARKER_VIRTUAL_IP;
+    *(intptr_t*)(buf + 1) = code_uid;
+    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
+    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
+    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
+    return 0;
+}
 
 int vmp_write_all(const char *buf, size_t bufsize)
 {
@@ -40,3 +53,168 @@
     return 0;
 }
 
+HANDLE write_mutex;
+
+#include "vmprof_common.h"
+
+int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
+{
+    HRESULT result;
+    HANDLE hThread;
+    int depth;
+    CONTEXT ctx;
+#ifdef RPYTHON_LL2CTYPES
+    return 0; // not much we can do
+#else
+#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
+    return 0; // we can't freeze threads, unsafe
+#else
+    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
+    if (!hThread) {
+        return -1;
+    }
+    result = SuspendThread(hThread);
+    if(result == 0xffffffff)
+        return -1; // possible, e.g. attached debugger or thread alread suspended
+    // find the correct thread
+#ifdef RPYTHON_VMPROF
+    ctx.ContextFlags = CONTEXT_FULL;
+    if (!GetThreadContext(hThread, &ctx))
+        return -1;
+    depth = get_stack_trace(tstate->vmprof_tl_stack,
+                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
+    stack->depth = depth;
+    stack->stack[depth++] = thread_id;
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#else
+    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
+                                      MAX_STACK_DEPTH, 0, 0);
+    stack->depth = depth;
+    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
+    stack->count = 1;
+    stack->marker = MARKER_STACKTRACE;
+    ResumeThread(hThread);
+    return depth;
+#endif
+
+#endif
+#endif
+}
+
+#ifndef RPYTHON_VMPROF
+static
+PY_WIN_THREAD_STATE * get_current_thread_state(void)
+{
+#if PY_MAJOR_VERSION < 3
+    return _PyThreadState_Current;
+#elif PY_VERSION_HEX < 0x03050200
+    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
+#else
+    return _PyThreadState_UncheckedGet();
+#endif
+}
+#endif
+
+long __stdcall vmprof_mainloop(void *arg)
+{
+#ifdef RPYTHON_LL2CTYPES
+    // for tests only
+    return 0;
+#else
+    // it is not a test case!
+    PY_WIN_THREAD_STATE *tstate;
+    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
+    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
+    int depth;
+#ifndef RPYTHON_VMPROF
+    // cpython version
+    while (1) {
+        Sleep(vmprof_get_profile_interval_usec() * 1000);
+        if (!enabled) {
+            continue;
+        }
+        tstate = get_current_thread_state();
+        if (!tstate)
+            continue;
+        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
+        if (depth > 0) {
+            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
+        }
+    }
+#else
+    // pypy version
+    while (1) {
+        //Sleep(vmprof_get_profile_interval_usec() * 1000);
+        Sleep(10);
+        if (!enabled) {
+            continue;
+        }
+        _RPython_ThreadLocals_Acquire();
+        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
+        tstate = _RPython_ThreadLocals_Enum(tstate);
+        while (tstate) {
+            if (tstate->ready == 42) {
+                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
+                if (depth > 0) {
+                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
+                         depth * sizeof(void *) +
+                         sizeof(struct prof_stacktrace_s) -
+                         offsetof(struct prof_stacktrace_s, marker));
+                }
+            }
+            tstate = _RPython_ThreadLocals_Enum(tstate);
+        }
+        _RPython_ThreadLocals_Release();
+    }
+#endif
+#endif
+}
+
+RPY_EXTERN
+int vmprof_enable(int memory, int native, int real_time)
+{
+    if (!thread_started) {
+        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
+            return -1;
+        }
+        thread_started = 1;
+    }
+    enabled = 1;
+    return 0;
+}
+
+RPY_EXTERN
+int vmprof_disable(void)
+{
+    char marker = MARKER_TRAILER;
+    (void)vmp_write_time_now(MARKER_TRAILER);
+
+    enabled = 0;
+    vmp_set_profile_fileno(-1);
+    return 0;
+}
+
+RPY_EXTERN
+void vmprof_ignore_signals(int ignored)
+{
+    enabled = !ignored;
+}
+
+int vmp_native_enable(void)
+{
+    return 0;
+}
+
+void vmp_native_disable(void)
+{
+}
+
+int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
+                    int max_depth, intptr_t pc)
+{
+    return 0;
+}
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h
@@ -3,20 +3,13 @@
 #include "windows.h"
 #include "compat.h"
 #include "vmp_stack.h"
-
-HANDLE write_mutex;
+#include <tlhelp32.h>
 
 int prepare_concurrent_bufs(void);
 
-#include "vmprof_common.h"
-#include <tlhelp32.h>
-
 // This file has been inspired (but not copied from since the LICENSE
 // would not allow it) from verysleepy profiler
 
-volatile int thread_started = 0;
-volatile int enabled = 0;
-
 int vmp_write_all(const char *buf, size_t bufsize);
 
 #ifdef RPYTHON_VMPROF
@@ -26,178 +19,14 @@
 #endif
 
 
-RPY_EXTERN
 int vmprof_register_virtual_function(char *code_name, intptr_t code_uid,
-                                     int auto_retry)
-{
-    char buf[2048];
-    long namelen;
+                                     int auto_retry);
 
-    namelen = (long)strnlen(code_name, 1023);
-    buf[0] = MARKER_VIRTUAL_IP;
-    *(intptr_t*)(buf + 1) = code_uid;
-    *(long*)(buf + 1 + sizeof(intptr_t)) = namelen;
-    memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen);
-    vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen);
-    return 0;
-}
-
-int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack)
-{
-    HRESULT result;
-    HANDLE hThread;
-    int depth;
-    CONTEXT ctx;
-#ifdef RPYTHON_LL2CTYPES
-    return 0; // not much we can do
-#else
-#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF)
-    return 0; // we can't freeze threads, unsafe
-#else
-    hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
-    if (!hThread) {
-        return -1;
-    }
-    result = SuspendThread(hThread);
-    if(result == 0xffffffff)
-        return -1; // possible, e.g. attached debugger or thread alread suspended
-    // find the correct thread
-#ifdef RPYTHON_VMPROF
-    ctx.ContextFlags = CONTEXT_FULL;
-    if (!GetThreadContext(hThread, &ctx))
-        return -1;
-    depth = get_stack_trace(tstate->vmprof_tl_stack,
-                     stack->stack, MAX_STACK_DEPTH-2, ctx.Eip);
-    stack->depth = depth;
-    stack->stack[depth++] = thread_id;
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#else
-    depth = vmp_walk_and_record_stack(tstate->frame, stack->stack,
-                                      MAX_STACK_DEPTH, 0, 0);
-    stack->depth = depth;
-    stack->stack[depth++] = (void*)((ULONG_PTR)thread_id);
-    stack->count = 1;
-    stack->marker = MARKER_STACKTRACE;
-    ResumeThread(hThread);
-    return depth;
-#endif
-
-#endif
-#endif
-}
-
-#ifndef RPYTHON_VMPROF
-static
-PY_WIN_THREAD_STATE * get_current_thread_state(void)
-{
-#if PY_MAJOR_VERSION < 3
-    return _PyThreadState_Current;
-#elif PY_VERSION_HEX < 0x03050200
-    return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
-#else
-    return _PyThreadState_UncheckedGet();
-#endif
-}
-#endif
-
-long __stdcall vmprof_mainloop(void *arg)
-{
-#ifdef RPYTHON_LL2CTYPES
-    // for tests only
-    return 0;
-#else
-    // it is not a test case!
-    PY_WIN_THREAD_STATE *tstate;
-    HANDLE hThreadSnap = INVALID_HANDLE_VALUE; 
-    prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);
-    int depth;
-#ifndef RPYTHON_VMPROF
-    // cpython version
-    while (1) {
-        Sleep(profile_interval_usec * 1000);
-        if (!enabled) {
-            continue;
-        }
-        tstate = get_current_thread_state();
-        if (!tstate)
-            continue;
-        depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack);
-        if (depth > 0) {
-            vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                          SIZEOF_PROF_STACKTRACE + depth * sizeof(void*));
-        }
-    }
-#else
-    // pypy version
-    while (1) {
-        //Sleep(profile_interval_usec * 1000);
-        Sleep(10);
-        if (!enabled) {
-            continue;
-        }
-        _RPython_ThreadLocals_Acquire();
-        tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head
-        tstate = _RPython_ThreadLocals_Enum(tstate);
-        while (tstate) {
-            if (tstate->ready == 42) {
-                depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack);
-                if (depth > 0) {
-                    vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker),
-                         depth * sizeof(void *) +
-                         sizeof(struct prof_stacktrace_s) -
-                         offsetof(struct prof_stacktrace_s, marker));
-                }
-            }
-            tstate = _RPython_ThreadLocals_Enum(tstate);
-        }
-        _RPython_ThreadLocals_Release();
-    }
-#endif
-#endif
-}
-
-RPY_EXTERN
-int vmprof_enable(int memory, int native, int real_time)
-{
-    if (!thread_started) {
-        if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) {
-            return -1;
-        }
-        thread_started = 1;
-    }
-    enabled = 1;
-    return 0;
-}
-
-RPY_EXTERN
-int vmprof_disable(void)
-{
-    char marker = MARKER_TRAILER;
-    (void)vmp_write_time_now(MARKER_TRAILER);
-
-    enabled = 0;
-    vmp_set_profile_fileno(-1);
-    return 0;
-}
-
-RPY_EXTERN
-void vmprof_ignore_signals(int ignored)
-{
-    enabled = !ignored;
-}
-
-int vmp_native_enable(void) {
-    return 0;
-}
-
-void vmp_native_disable(void) {
-}
-
+PY_WIN_THREAD_STATE * get_current_thread_state(void);
+int vmprof_enable(int memory, int native, int real_time);
+int vmprof_disable(void);
+void vmprof_ignore_signals(int ignored);
+int vmp_native_enable(void);
+void vmp_native_disable(void);
 int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result,
-		    int max_depth, intptr_t pc)
-{
-    return 0;
-}
+                    int max_depth, intptr_t pc);
diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py
--- a/rpython/rlib/rvmprof/test/test_file.py
+++ b/rpython/rlib/rvmprof/test/test_file.py
@@ -2,25 +2,43 @@
 import urllib2, py
 from os.path import join
 
+RVMPROF = py.path.local(__file__).join('..', '..')
 
 def github_raw_file(repo, path, branch='master'):
-    return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict(
-                repo=repo, path=path, branch=branch
-            ))
+    url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}"
+    return url.format(repo=repo, path=path, branch=branch)
 
+def get_list_of_files(shared):
+    files = list(shared.visit('*.[ch]'))
+    # in PyPy we checkin the result of ./configure; as such, these files are
+    # not in github and can be skipped
+    files.remove(shared.join('libbacktrace', 'config-x86_32.h'))
+    files.remove(shared.join('libbacktrace', 'config-x86_64.h'))
+    files.remove(shared.join('libbacktrace', 'gstdint.h'))
+    return files
 
 def test_same_file():
-    for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'):
-        for file in files:
-            if not (file.endswith(".c") or file.endswith(".h")):
-                continue
-            url = github_raw_file("vmprof/vmprof-python", "src/%s" % file)
-            source = urllib2.urlopen(url).read()
-            #
-            dest = py.path.local(join(root, file)).read()
-            if source != dest:
-                raise AssertionError("%s was updated, but changes were"
-                                     "not copied over to PyPy" % url)
-            else:
-                print("%s matches" % url)
-        break # do not walk dirs
+    shared = RVMPROF.join('src', 'shared')
+    files = get_list_of_files(shared)
+    assert files, 'cannot find any C file, probably the directory is wrong?'
+    no_matches = []
+    print
+    for file in files:
+        path = file.relto(shared)
+        url = github_raw_file("vmprof/vmprof-python", "src/%s" % path)
+        source = urllib2.urlopen(url).read()
+        dest = file.read()
+        shortname = file.relto(RVMPROF)
+        if source == dest:
+            print '%s matches' % shortname
+        else:
+            print '%s does NOT match' % shortname
+            no_matches.append(file)
+    #
+    if no_matches:
+        print
+        print 'The following file dit NOT match'
+        for f in no_matches:
+            print '   ', f.relto(RVMPROF)
+        raise AssertionError("some files were updated on github, "
+                             "but were not copied here")
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -1,214 +1,183 @@
 import py, os
+import pytest
+import time
 from rpython.tool.udir import udir
 from rpython.rlib import rvmprof
 from rpython.translator.c.test.test_genc import compile
-from rpython.rlib.objectmodel import we_are_translated
-from rpython.rlib.nonconst import NonConstant
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.rtyper.lltypesystem import rffi, lltype
 
+ at pytest.mark.usefixtures('init')
+class RVMProfTest(object):
 
-def test_vmprof_execute_code_1():
+    ENTRY_POINT_ARGS = ()
 
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
+    class MyCode(object):
+        def __init__(self, name='py:code:0:noname'):
+            self.name = name
 
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
+        def get_name(self):
+            return self.name
+
+    @pytest.fixture
+    def init(self):
+        self.register()
+        self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS)
+
+    def register(self):
+        rvmprof.register_code_object_class(self.MyCode,
+                                           self.MyCode.get_name)
+
+
+class TestExecuteCode(RVMProfTest):
+
+    def entry_point(self):
+        res = self.main(self.MyCode(), 5)
+        assert res == 42
+        return 0
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
         print num
         return 42
 
-    def f():
-        res = main(MyCode(), 5)
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
+
+
+class TestResultClass(RVMProfTest):
+
+    class A: pass
+
+    @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code,
+                                 result_class=A)
+    def main(self, num, code):
+        print num
+        return self.A()
+
+    def entry_point(self):
+        a = self.main(7, self.MyCode())
+        assert isinstance(a, self.A)
+        return 0
+
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
+
+
+class TestRegisterCode(RVMProfTest):
+    
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code)
+    def main(self, code, num):
+        print num
+        return 42
+
+    def entry_point(self):
+        code = self.MyCode()
+        rvmprof.register_code(code, lambda code: 'some code')
+        res = self.main(code, 5)
         assert res == 42
         return 0
 
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
+    def test(self):
+        assert self.entry_point() == 0
+        assert self.rpy_entry_point() == 0
 
 
-def test_vmprof_execute_code_2():
+class RVMProfSamplingTest(RVMProfTest):
 
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported:
-        pass
+    # the kernel will deliver SIGPROF at max 250 Hz. See also
+    # https://github.com/vmprof/vmprof-python/issues/163
+    SAMPLING_INTERVAL = 1/250.0
 
-    class A:
-        pass
+    @pytest.fixture
+    def init(self, tmpdir):
+        self.tmpdir = tmpdir
+        self.tmpfile = tmpdir.join('profile.vmprof')
+        self.tmpfilename = str(self.tmpfile)
+        super(RVMProfSamplingTest, self).init()
 
-    @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code,
-                                 result_class=A)
-    def main(num, code):
-        print num
-        return A()
+    ENTRY_POINT_ARGS = (int, float)
+    def entry_point(self, value, delta_t):
+        code = self.MyCode('py:code:52:test_enable')
+        rvmprof.register_code(code, self.MyCode.get_name)
+        fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
+        rvmprof.enable(fd, self.SAMPLING_INTERVAL)
+        start = time.time()
+        res = 0
+        while time.time() < start+delta_t:
+            res = self.main(code, value)
+        rvmprof.disable()
+        os.close(fd)
+        return res
 
-    def f():
-        a = main(7, MyCode())
-        assert isinstance(a, A)
-        return 0
+    def approx_equal(self, a, b, tolerance=0.1):
+        max_diff = (a+b)/2.0 * tolerance
+        return abs(a-b) < max_diff
 
-    assert f() == 0
-    fn = compile(f, [])
-    assert fn() == 0
 
+class TestEnable(RVMProfSamplingTest):
 
-def test_register_code():
-
-    class MyCode:
-        pass
-    try:
-        rvmprof.register_code_object_class(MyCode, lambda code: 'some code')
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        print num
-        return 42
-
-    def f():
-        code = MyCode()
-        rvmprof.register_code(code, lambda code: 'some code')
-        res = main(code, 5)
-        assert res == 42
-        return 0
-
-    assert f() == 0
-    fn = compile(f, [], gcpolicy="minimark")
-    assert fn() == 0
-
-
-def test_enable():
-
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        print num
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
         s = 0
-        for i in range(num):
+        for i in range(count):
             s += (i << 1)
-            if s % 2123423423 == 0:
-                print s
         return s
 
-    tmpfilename = str(udir.join('test_rvmprof'))
+    def test(self):
+        from vmprof import read_profile
+        assert self.entry_point(10**4, 0.1) == 99990000
+        assert self.tmpfile.check()
+        self.tmpfile.remove()
+        #
+        assert self.rpy_entry_point(10**4, 0.5) == 99990000
+        assert self.tmpfile.check()
+        prof = read_profile(self.tmpfilename)
+        tree = prof.get_tree()
+        assert tree.name == 'py:code:52:test_enable'
+        assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL)
 
-    def f():
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
-        code = MyCode()
-        rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666)
-        if we_are_translated():
-            num = 100000000
-            period = 0.0001
+
+class TestNative(RVMProfSamplingTest):
+
+    @pytest.fixture
+    def init(self, tmpdir):
+        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+                separate_module_sources=["""
+                RPY_EXTERN int native_func(int d) {
+                    int j = 0;
+                    if (d > 0) {
+                        return native_func(d-1);
+                    } else {
+                        for (int i = 0; i < 42000; i++) {
+                            j += 1;
+                        }
+                    }
+                    return j;
+                }
+                """])
+        self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
+                                           compilation_info=eci)
+        super(TestNative, self).init(tmpdir)
+
+    @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
+    def main(self, code, count):
+        if count > 0:
+            return self.main(code, count-1)
         else:
-            num = 10000
-            period = 0.9
-        rvmprof.enable(fd, period)
-        res = main(code, num)
-        #assert res == 499999500000
-        rvmprof.disable()
-        os.close(fd)
-        return 0
+            return self.native_func(100)
 
-    def check_profile(filename):
-        from vmprof import read_profile
-
-        prof = read_profile(filename)
-        assert prof.get_tree().name.startswith("py:")
-        assert prof.get_tree().count
-
-    assert f() == 0
-    assert os.path.exists(tmpfilename)
-    fn = compile(f, [], gcpolicy="minimark")
-    assert fn() == 0
-    try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
-
-def test_native():
-    eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
-            separate_module_sources=["""
-            RPY_EXTERN int native_func(int d) {
-                int j = 0;
-                if (d > 0) {
-                    return native_func(d-1);
-                } else {
-                    for (int i = 0; i < 42000; i++) {
-                        j += d;
-                    }
-                }
-                return j;
-            }
-            """])
-
-    native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT,
-                                  compilation_info=eci)
-
-    class MyCode:
-        pass
-    def get_name(code):
-        return 'py:code:52:x'
-
-    try:
-        rvmprof.register_code_object_class(MyCode, get_name)
-    except rvmprof.VMProfPlatformUnsupported as e:
-        py.test.skip(str(e))
-
-    @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
-    def main(code, num):
-        if num > 0:
-            return main(code, num-1)
-        else:
-            return native_func(100)
-
-    tmpfilename = str(udir.join('test_rvmprof'))
-
-    def f():
-        if NonConstant(False):
-            # Hack to give os.open() the correct annotation
-            os.open('foo', 1, 1)
-        code = MyCode()
-        rvmprof.register_code(code, get_name)
-        fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666)
-        num = 10000
-        period = 0.0001
-        rvmprof.enable(fd, period, native=1)
-        for i in range(num):
-            res = main(code, 3)
-        rvmprof.disable()
-        os.close(fd)
-        return 0
-
-    def check_profile(filename):
+    def test(self):
+        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
+        # never ran it. FIXME.
         from vmprof import read_profile
         from vmprof.show import PrettyPrinter
-
-        prof = read_profile(filename)
+        assert self.rpy_entry_point(3, 0.5) == 42000
+        assert self.tmpfile.check()
+        #
+        prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
         p = PrettyPrinter()
         p._print_tree(tree)
@@ -227,16 +196,3 @@
                     del not_found[i]
                     break
         assert not_found == []
-
-    fn = compile(f, [], gcpolicy="incminimark", lldebug=True)
-    assert fn() == 0
-    try:
-        import vmprof
-    except ImportError:
-        py.test.skip("vmprof unimportable")
-    else:
-        check_profile(tmpfilename)
-    finally:
-        assert os.path.exists(tmpfilename)
-        os.unlink(tmpfilename)
-
diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py
--- a/rpython/translator/c/test/test_standalone.py
+++ b/rpython/translator/c/test/test_standalone.py
@@ -1102,22 +1102,6 @@
         assert out.strip() == 'ok'
 
 
-class TestMaemo(TestStandalone):
-    def setup_class(cls):
-        py.test.skip("TestMaemo: tests skipped for now")
-        from rpython.translator.platform.maemo import check_scratchbox
-        check_scratchbox()
-        config = get_combined_translation_config(translating=True)
-        config.translation.platform = 'maemo'
-        cls.config = config
-
-    def test_profopt(self):
-        py.test.skip("Unsupported")
-
-    def test_prof_inline(self):
-        py.test.skip("Unsupported")
-
-
 class TestThread(object):
     gcrootfinder = 'shadowstack'
     config = None
diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py
--- a/rpython/translator/platform/__init__.py
+++ b/rpython/translator/platform/__init__.py
@@ -320,24 +320,16 @@
     else:
         host_factory = Cygwin64
 else:
-    # pray
-    from rpython.translator.platform.distutils_platform import DistutilsPlatform
-    host_factory = DistutilsPlatform
+    raise ValueError('unknown sys.platform "%s"', sys.platform)
 
 platform = host = host_factory()
 
 def pick_platform(new_platform, cc):
     if new_platform == 'host':
         return host_factory(cc)
-    elif new_platform == 'maemo':
-        from rpython.translator.platform.maemo import Maemo
-        return Maemo(cc)
     elif new_platform == 'arm':
         from rpython.translator.platform.arm import ARM
         return ARM(cc)
-    elif new_platform == 'distutils':
-        from rpython.translator.platform.distutils_platform import DistutilsPlatform
-        return DistutilsPlatform()
     else:
         raise ValueError("platform = %s" % (new_platform,))
 
diff --git a/rpython/translator/platform/distutils_platform.py b/rpython/translator/platform/distutils_platform.py
deleted file mode 100644
--- a/rpython/translator/platform/distutils_platform.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import py, os, sys
-
-from rpython.translator.platform import Platform, log, CompilationError
-from rpython.translator.tool import stdoutcapture
-
-def log_spawned_cmd(spawn):
-    def spawn_and_log(cmd, *args, **kwds):
-        log.execute(' '.join(cmd))
-        return spawn(cmd, *args, **kwds)
-    return spawn_and_log
-
-CFLAGS = ['-O3']
-
-if os.name != 'nt':
-    so_ext = 'so'
-else:
-    so_ext = 'dll'
-
-class DistutilsPlatform(Platform):
-    """ This is a generic distutils platform. I hope it'll go away at some
-    point soon completely
-    """
-    name = "distutils"
-    so_ext = so_ext
-    
-    def __init__(self, cc=None):
-        self.cc = cc
-        if self.name == "distutils":
-            self.name = sys.platform
-    
-    def _ensure_correct_math(self):
-        if self.name != 'win32':
-            return # so far
-        from distutils import sysconfig
-        gcv = sysconfig.get_config_vars()
-        opt = gcv.get('OPT') # not always existent
-        if opt and '/Op' not in opt:
-            opt += '/Op'
-        gcv['OPT'] = opt
-    
-    def compile(self, cfilenames, eci, outputfilename=None, standalone=True):
-        self._ensure_correct_math()
-        self.cfilenames = cfilenames
-        if standalone:
-            ext = ''
-        else:
-            ext = so_ext
-        self.standalone = standalone
-        self.libraries = list(eci.libraries)
-        self.include_dirs = list(eci.include_dirs)
-        self.library_dirs = list(eci.library_dirs)
-        self.compile_extra = list(eci.compile_extra)
-        self.link_extra = list(eci.link_extra)
-        self.frameworks = list(eci.frameworks)
-        if not self.name in ('win32', 'darwin', 'cygwin'): # xxx
-            if 'm' not in self.libraries:
-                self.libraries.append('m')
-            self.compile_extra += CFLAGS + ['-fomit-frame-pointer']
-            if 'pthread' not in self.libraries:
-                self.libraries.append('pthread')
-            if self.name != 'sunos5': 
-                self.compile_extra += ['-pthread']
-                self.link_extra += ['-pthread']
-            else:
-                self.compile_extra += ['-pthreads']
-                self.link_extra += ['-lpthread']
-        if self.name == 'win32':
-            self.link_extra += ['/DEBUG'] # generate .pdb file
-        if self.name == 'darwin':
-            # support Fink & Darwinports
-            for s in ('/sw/', '/opt/local/'):
-                if s + 'include' not in self.include_dirs and \
-                   os.path.exists(s + 'include'):
-                    self.include_dirs.append(s + 'include')
-                if s + 'lib' not in self.library_dirs and \
-                   os.path.exists(s + 'lib'):
-                    self.library_dirs.append(s + 'lib')
-            self.compile_extra += CFLAGS + ['-fomit-frame-pointer']
-            for framework in self.frameworks:
-                self.link_extra += ['-framework', framework]
-
-        if outputfilename is None:
-            self.outputfilename = py.path.local(cfilenames[0]).new(ext=ext)
-        else:
-            self.outputfilename = py.path.local(outputfilename)
-        self.eci = eci
-        import distutils.errors
-        basename = self.outputfilename.new(ext='')
-        data = ''
-        try:
-            saved_environ = os.environ.copy()
-            c = stdoutcapture.Capture(mixed_out_err=True)
-            try:
-                self._build()
-            finally:
-                # workaround for a distutils bugs where some env vars can
-                # become longer and longer every time it is used
-                for key, value in saved_environ.items():
-                    if os.environ.get(key) != value:
-                        os.environ[key] = value
-                foutput, foutput = c.done()
-                data = foutput.read()
-                if data:
-                    fdump = basename.new(ext='errors').open("wb")
-                    fdump.write(data)
-                    fdump.close()
-        except (distutils.errors.CompileError,
-                distutils.errors.LinkError):
-            raise CompilationError('', data)
-        except:
-            print >>sys.stderr, data
-            raise
-        return self.outputfilename
-
-    def _build(self):
-        from distutils.ccompiler import new_compiler
-        from distutils import sysconfig
-        compiler = new_compiler(force=1)
-        if self.cc is not None:
-            for c in '''compiler compiler_so compiler_cxx
-                        linker_exe linker_so'''.split():
-                compiler.executables[c][0] = self.cc
-        if not self.standalone:
-            sysconfig.customize_compiler(compiler) # XXX
-        compiler.spawn = log_spawned_cmd(compiler.spawn)
-        objects = []
-        for cfile in self.cfilenames:
-            cfile = py.path.local(cfile)
-            compile_extra = self.compile_extra[:]
-
-            old = cfile.dirpath().chdir()
-            try:
-                res = compiler.compile([cfile.basename],
-                                       include_dirs=self.eci.include_dirs,
-                                       extra_preargs=compile_extra)
-                assert len(res) == 1
-                cobjfile = py.path.local(res[0])
-                assert cobjfile.check()
-                objects.append(str(cobjfile))
-            finally:
-                old.chdir()
-
-        if self.standalone:
-            cmd = compiler.link_executable
-        else:
-            cmd = compiler.link_shared_object
-        cmd(objects, str(self.outputfilename),
-            libraries=self.eci.libraries,
-            extra_preargs=self.link_extra,
-            library_dirs=self.eci.library_dirs)
-
-    def _include_dirs_for_libffi(self):
-        return ['/usr/include/libffi']
-
-    def _library_dirs_for_libffi(self):
-        return ['/usr/lib/libffi']
-
diff --git a/rpython/translator/platform/maemo.py b/rpython/translator/platform/maemo.py
deleted file mode 100644
--- a/rpython/translator/platform/maemo.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Support for Maemo."""
-
-import py, os
-
-from rpython.tool.udir import udir
-from rpython.translator.platform import ExecutionResult, log
-from rpython.translator.platform.linux import Linux
-from rpython.translator.platform.posix import GnuMakefile, _run_subprocess
-
-def check_scratchbox():
-    # in order to work, that file must exist and be executable by us
-    if not os.access('/scratchbox/login', os.X_OK):
-        py.test.skip("No scratchbox detected")
-
-class Maemo(Linux):
-    name = "maemo"
-    
-    available_includedirs = ('/usr/include', '/tmp')
-    copied_cache = {}
-
-    def _invent_new_name(self, basepath, base):
-        pth = basepath.join(base)
-        num = 0
-        while pth.check():
-            pth = basepath.join('%s_%d' % (base,num))
-            num += 1
-        return pth.ensure(dir=1)
-
-    def _copy_files_to_new_dir(self, dir_from, pattern='*.[ch]'):
-        try:
-            return self.copied_cache[dir_from]
-        except KeyError:
-            new_dirpath = self._invent_new_name(udir, 'copied_includes')
-            files = py.path.local(dir_from).listdir(pattern)
-            for f in files:
-                f.copy(new_dirpath)
-            # XXX <hack for pypy>
-            srcdir = py.path.local(dir_from).join('src')
-            if srcdir.check(dir=1):
-                target = new_dirpath.join('src').ensure(dir=1)
-                for f in srcdir.listdir(pattern):
-                    f.copy(target)
-            # XXX </hack for pypy>
-            self.copied_cache[dir_from] = new_dirpath
-            return new_dirpath
-    
-    def _preprocess_include_dirs(self, include_dirs):
-        """ Tweak includedirs so they'll be available through scratchbox
-        """
-        res_incl_dirs = []
-        for incl_dir in include_dirs:
-            incl_dir = py.path.local(incl_dir)
-            for available in self.available_includedirs:
-                if incl_dir.relto(available):
-                    res_incl_dirs.append(str(incl_dir))
-                    break
-            else:
-                # we need to copy files to a place where it's accessible
-                res_incl_dirs.append(self._copy_files_to_new_dir(incl_dir))
-        return res_incl_dirs
-    
-    def _execute_c_compiler(self, cc, args, outname):
-        log.execute('/scratchbox/login ' + cc + ' ' + ' '.join(args))
-        args = [cc] + args
-        returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args)
-        self._handle_error(returncode, stdout, stderr, outname)
-    
-    def execute(self, executable, args=[], env=None):
-        if isinstance(args, str):
-            args = str(executable) + ' ' + args
-            log.message('executing /scratchbox/login ' + args)
-        else:
-            args = [str(executable)] + args
-            log.message('executing /scratchbox/login ' + ' '.join(args))
-        returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args,
-                                                     env)
-        return ExecutionResult(returncode, stdout, stderr)
-
-    def _include_dirs_for_libffi(self):
-        # insanely obscure dir
-        return ['/usr/include/arm-linux-gnueabi/']
-
-    def _library_dirs_for_libffi(self):
-        # on the other hand, library lands in usual place...
-        return []
-
-    def execute_makefile(self, path_to_makefile, extra_opts=[]):
-        if isinstance(path_to_makefile, GnuMakefile):
-            path = path_to_makefile.makefile_dir
-        else:
-            path = path_to_makefile
-        log.execute('make %s in %s' % (" ".join(extra_opts), path))
-        returncode, stdout, stderr = _run_subprocess(
-            '/scratchbox/login', ['make', '-C', str(path)] + extra_opts)
-        self._handle_error(returncode, stdout, stderr, path.join('make'))
diff --git a/rpython/translator/platform/test/test_distutils.py b/rpython/translator/platform/test/test_distutils.py
deleted file mode 100644
--- a/rpython/translator/platform/test/test_distutils.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest
-from rpython.translator.platform.distutils_platform import DistutilsPlatform
-import py
-
-class TestDistutils(BasicTest):
-    platform = DistutilsPlatform()
-
-    def test_nice_errors(self):
-        py.test.skip("Unsupported")
-
-    def test_900_files(self):
-        py.test.skip('Makefiles not suppoerted')
-
-    def test_precompiled_headers(self):
-        py.test.skip('Makefiles not suppoerted')
-
diff --git a/rpython/translator/platform/test/test_maemo.py b/rpython/translator/platform/test/test_maemo.py
deleted file mode 100644
--- a/rpython/translator/platform/test/test_maemo.py
+++ /dev/null
@@ -1,37 +0,0 @@
-
-""" File containing maemo platform tests
-"""
-
-import py
-from rpython.tool.udir import udir
-from rpython.translator.platform.maemo import Maemo, check_scratchbox
-from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest
-from rpython.translator.tool.cbuild import ExternalCompilationInfo
-
-class TestMaemo(BasicTest):
-    platform = Maemo()
-    strict_on_stderr = False
-
-    def setup_class(cls):
-        py.test.skip("TestMaemo: tests skipped for now")
-        check_scratchbox()
-
-    def test_includes_outside_scratchbox(self):
-        cfile = udir.join('test_includes_outside_scratchbox.c')
-        cfile.write('''
-        #include <stdio.h>
-        #include "test.h"
-        int main()
-        {
-            printf("%d\\n", XXX_STUFF);
-            return 0;
-        }
-        ''')
-        includedir = py.path.local(__file__).dirpath().join('include')
-        eci = ExternalCompilationInfo(include_dirs=(includedir,))
-        executable = self.platform.compile([cfile], eci)
-        res = self.platform.execute(executable)
-        self.check_res(res)
-
-    def test_environment_inheritance(self):
-        py.test.skip("FIXME")
diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py
--- a/rpython/translator/platform/test/test_platform.py
+++ b/rpython/translator/platform/test/test_platform.py
@@ -147,10 +147,13 @@
 
 
 def test_is_host_build():
+    from platform import machine
     from rpython.translator import platform
     assert platform.host == platform.platform
 
     assert platform.is_host_build()
-    platform.set_platform('maemo', None)
-    assert platform.host != platform.platform
-    assert not platform.is_host_build()
+    # do we support non-host builds?
+    if machine().startswith('arm'):
+        platform.set_platform('arm', None)
+        assert platform.host != platform.platform
+        assert not platform.is_host_build()
diff --git a/rpython/translator/platform/test/test_posix.py b/rpython/translator/platform/test/test_posix.py
--- a/rpython/translator/platform/test/test_posix.py
+++ b/rpython/translator/platform/test/test_posix.py
@@ -64,10 +64,3 @@
         assert 'INCLUDEDIRS = %s/foo/baz/include' % include_prefix in Makefile
         assert 'LIBDIRS = %s/foo/baz/lib' % lib_prefix in Makefile
 
-class TestMaemo(TestMakefile):
-    strict_on_stderr = False
-    
-    def setup_class(cls):
-        from rpython.translator.platform.maemo import check_scratchbox, Maemo
-        check_scratchbox()
-        cls.platform = Maemo()
diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py
--- a/rpython/translator/translator.py
+++ b/rpython/translator/translator.py
@@ -141,6 +141,9 @@
     if isinstance(func, FunctionGraph):
         return func
     result = []
+    if hasattr(func, 'im_func'):
+        # make it possible to translate bound methods
+        func = func.im_func
     for graph in translator.graphs:
         if getattr(graph, 'func', None) is func:
             result.append(graph)

From pypy.commits at gmail.com  Mon Nov 13 16:10:06 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 13:10:06 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Return True from inspect.isbuiltin() for
 functions implemented in C
Message-ID: <5a0a0a2e.4eb6df0a.d7dd1.2f91@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93007:06fb68203b17
Date: 2017-11-13 21:09 +0000
http://bitbucket.org/pypy/pypy/changeset/06fb68203b17/

Log:	Return True from inspect.isbuiltin() for functions implemented in C

diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py
--- a/lib-python/3/inspect.py
+++ b/lib-python/3/inspect.py
@@ -49,6 +49,10 @@
 import builtins
 from operator import attrgetter
 from collections import namedtuple, OrderedDict
+try:
+    from cpyext import is_cpyext_function as _is_cpyext_function
+except ImportError:
+    _is_cpyext_function = lambda obj: False
 
 # Create constants for the compiler flags in Include/code.h
 # We try to get them from dis to avoid duplication
@@ -262,7 +266,7 @@
         __doc__         documentation string
         __name__        original name of this function or method
         __self__        instance to which a method is bound, or None"""
-    return isinstance(object, types.BuiltinFunctionType)
+    return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object)
 
 def isroutine(object):
     """Return true if the object is any kind of function or method."""

From pypy.commits at gmail.com  Mon Nov 13 16:33:11 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 13 Nov 2017 13:33:11 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Skip (parts of) tests that require _pickle
Message-ID: <5a0a0f97.26acdf0a.6d561.7b3a@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93008:fc81c46b1987
Date: 2017-11-13 21:32 +0000
http://bitbucket.org/pypy/pypy/changeset/fc81c46b1987/

Log:	Skip (parts of) tests that require _pickle

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -32,6 +32,8 @@
 from test.support import check_impl_detail
 
 from test.test_import import _ready_to_import
+if check_impl_detail():
+    import _pickle
 
 
 # Functions tested in this suite:
@@ -755,12 +757,12 @@
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_getfullargspec_builtin_methods(self):
-        import _pickle
-        self.assertFullArgSpecEquals(_pickle.Pickler.dump,
-                                     args_e=['self', 'obj'], formatted='(self, obj)')
-
-        self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump,
-                                     args_e=['self', 'obj'], formatted='(self, obj)')
+        if check_impl_detail():
+            self.assertFullArgSpecEquals(_pickle.Pickler.dump,
+                                        args_e=['self', 'obj'], formatted='(self, obj)')
+
+            self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump,
+                                        args_e=['self', 'obj'], formatted='(self, obj)')
 
         self.assertFullArgSpecEquals(
              os.stat,
@@ -1961,7 +1963,6 @@
                      "Signature information for builtins requires docstrings")
     def test_signature_on_builtins(self):
         import _testcapi
-        import _pickle
 
         def test_unbound_method(o):
             """Use this to test unbound methods (things that should have a self)"""
@@ -1995,9 +1996,10 @@
 
         # normal method
         # (PyMethodDescr_Type, "method_descriptor")
-        test_unbound_method(_pickle.Pickler.dump)
-        d = _pickle.Pickler(io.StringIO())
-        test_callable(d.dump)
+        if check_impl_detail():
+            test_unbound_method(_pickle.Pickler.dump)
+            d = _pickle.Pickler(io.StringIO())
+            test_callable(d.dump)
 
         # static method
         test_callable(str.maketrans)
@@ -2627,10 +2629,10 @@
         with self.assertRaisesRegex(ValueError, "callable.*is not supported"):
             self.assertEqual(inspect.signature(D), None)
 
+    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_signature_on_builtin_class(self):
-        import _pickle
         self.assertEqual(str(inspect.signature(_pickle.Pickler)),
                          '(file, protocol=None, fix_imports=True)')
 
@@ -2876,10 +2878,10 @@
         foo_sig = MySignature.from_callable(foo)
         self.assertTrue(isinstance(foo_sig, MySignature))
 
+    @cpython_only
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")
     def test_signature_from_callable_builtin_obj(self):
-        import _pickle
         class MySignature(inspect.Signature): pass
         sig = MySignature.from_callable(_pickle.Pickler)
         self.assertTrue(isinstance(sig, MySignature))

From pypy.commits at gmail.com  Mon Nov 13 17:45:56 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 13 Nov 2017 14:45:56 -0800 (PST)
Subject: [pypy-commit] pypy default: Merged in tpruzina/pypy (pull request
 #581)
Message-ID: <5a0a20a4.51bbdf0a.dbb51.7cf7@mx.google.com>

Author: Armin Rigo <armin.rigo at gmail.com>
Branch: 
Changeset: r93010:27b914ed1ea1
Date: 2017-11-13 22:45 +0000
http://bitbucket.org/pypy/pypy/changeset/27b914ed1ea1/

Log:	Merged in tpruzina/pypy (pull request #581)

	fix detect_pax behavior on linux if procfs is mounted with
	hidepid>=1

	Approved-by: Vadim A. Misbakh-Soloviov <atlassian at mva.name>

diff --git a/rpython/config/support.py b/rpython/config/support.py
--- a/rpython/config/support.py
+++ b/rpython/config/support.py
@@ -41,8 +41,8 @@
     Function to determine if your system comes with PAX protection.
     """
     if sys.platform.startswith('linux'):
-        # we need a running process PID and 1 is always running
-        with open("/proc/1/status") as fd:
+        # use PID of current process for the check
+        with open("/proc/self/status") as fd:
             data = fd.read()
         if 'PaX' in data:
             return True

From pypy.commits at gmail.com  Mon Nov 13 17:46:06 2017
From: pypy.commits at gmail.com (=?utf-8?b?VG9tw6HFoSBQcnXFvmluYSA8bm9yZXBseUBidWlsZGJvdC5weXB5Lm9y?=
 =?utf-8?q?g=3E?=)
Date: Mon, 13 Nov 2017 14:46:06 -0800 (PST)
Subject: [pypy-commit] pypy default: fix detect_pax behavior on linux where
 procfs is mounted with hidepid>=1
Message-ID: <5a0a20ae.83c4df0a.708b4.1e10@mx.google.com>

Author: Tom&#225;&#353; Pru&#382;ina <pruzinat at gmail.com>
Branch: 
Changeset: r93009:368d2eef1229
Date: 2017-11-05 05:46 +0100
http://bitbucket.org/pypy/pypy/changeset/368d2eef1229/

Log:	fix detect_pax behavior on linux where procfs is mounted with
	hidepid>=1

	PID1 (init) isn't observable on systems with procfs mounted with
	hidepid=1,2 unless build runs under root (for example on Gentoo
	where package manager compiles under user 'portage'). This can be
	fixed by replacing /proc/1/status with /proc/self/status (which is
	visible to the build script).

diff --git a/rpython/config/support.py b/rpython/config/support.py
--- a/rpython/config/support.py
+++ b/rpython/config/support.py
@@ -41,8 +41,8 @@
     Function to determine if your system comes with PAX protection.
     """
     if sys.platform.startswith('linux'):
-        # we need a running process PID and 1 is always running
-        with open("/proc/1/status") as fd:
+        # use PID of current process for the check
+        with open("/proc/self/status") as fd:
             data = fd.read()
         if 'PaX' in data:
             return True

From pypy.commits at gmail.com  Tue Nov 14 04:20:23 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 14 Nov 2017 01:20:23 -0800 (PST)
Subject: [pypy-commit] pypy memory-accounting: add tracking of memory
Message-ID: <5a0ab557.49c71c0a.fc848.8678@mx.google.com>

Author: fijal
Branch: memory-accounting
Changeset: r93011:5e198814c5f6
Date: 2017-11-14 10:19 +0100
http://bitbucket.org/pypy/pypy/changeset/5e198814c5f6/

Log:	add tracking of memory

diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py
--- a/pypy/module/gc/__init__.py
+++ b/pypy/module/gc/__init__.py
@@ -19,6 +19,7 @@
                 space.config.translation.gctransformer == "framework"):
             self.appleveldefs.update({
                 'dump_rpy_heap': 'app_referents.dump_rpy_heap',
+                'get_stats': 'app_referents.get_stats',
                 })
             self.interpleveldefs.update({
                 'get_rpy_roots': 'referents.get_rpy_roots',
@@ -28,7 +29,7 @@
                 'get_objects': 'referents.get_objects',
                 'get_referents': 'referents.get_referents',
                 'get_referrers': 'referents.get_referrers',
-                'get_stats': 'referents.get_stats',
+                '_get_stats': 'referents.get_stats',
                 '_dump_rpy_heap': 'referents._dump_rpy_heap',
                 'get_typeids_z': 'referents.get_typeids_z',
                 'get_typeids_list': 'referents.get_typeids_list',
diff --git a/pypy/module/gc/app_referents.py b/pypy/module/gc/app_referents.py
--- a/pypy/module/gc/app_referents.py
+++ b/pypy/module/gc/app_referents.py
@@ -48,3 +48,42 @@
                 file.flush()
             fd = file.fileno()
         gc._dump_rpy_heap(fd)
+
+class GcStats(object):
+    def __init__(self, s):
+        self._s = s
+        for item in ('total_gc_memory', 'jit_backend_used', 'total_memory_pressure',
+                     'total_allocated_memory', 'jit_backend_allocated'):
+            setattr(self, item, self._format(getattr(self._s, item)))
+        self.memory_used_sum = self._format(self._s.total_gc_memory + self._s.total_memory_pressure +
+                                            self._s.jit_backend_used)
+        self.memory_allocated_sum = self._format(self._s.total_allocated_memory + self._s.total_memory_pressure +
+                                            self._s.jit_backend_allocated)
+
+    def _format(self, v):
+        if v < 1000000:
+            # bit unlikely ;-)
+            return "%.1fkB" % (v / 1024.)
+        return "%.1fMB" % (v / 1024. / 1024.)
+
+    def repr(self):
+        return """Total memory consumed:
+GC used:            %s
+raw assembler used: %s
+memory pressure:    %s
+-----------------------------
+Total:              %s
+
+Total memory allocated:
+GC allocated:            %s
+raw assembler allocated: %s
+memory pressure:         %s
+-----------------------------
+Total:                   %s
+""" % (self.total_gc_memory, self.jit_backend_used, self.total_memory_pressure,
+       self.memory_used_sum,
+       self.total_allocated_memory, self.jit_backend_allocated, self.total_memory_pressure,
+       self.memory_allocated_sum)
+
+def get_stats():
+    return GcStats(gc._get_stats())
diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py
--- a/pypy/module/gc/referents.py
+++ b/pypy/module/gc/referents.py
@@ -1,7 +1,7 @@
-from rpython.rlib import rgc
+from rpython.rlib import rgc, jit_hooks
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.typedef import TypeDef, interp_attrproperty
-from pypy.interpreter.gateway import unwrap_spec
+from pypy.interpreter.gateway import unwrap_spec, interp2app
 from pypy.interpreter.error import oefmt, wrap_oserror
 from rpython.rlib.objectmodel import we_are_translated
 
@@ -175,12 +175,21 @@
     def __init__(self):
         self.total_memory_pressure = rgc.get_stats(rgc.TOTAL_MEMORY_PRESSURE)
         self.total_gc_memory = rgc.get_stats(rgc.TOTAL_MEMORY)
+        self.total_allocated_memory = rgc.get_stats(rgc.TOTAL_ALLOCATED_MEMORY)
+        self.jit_backend_allocated = jit_hooks.stats_asmmemmgr_allocated(None)
+        self.jit_backend_used = jit_hooks.stats_asmmemmgr_used(None)
 
 W_GcStats.typedef = TypeDef("GcStats",
     total_memory_pressure=interp_attrproperty("total_memory_pressure",
         cls=W_GcStats, wrapfn="newint"),
     total_gc_memory=interp_attrproperty("total_gc_memory",
-        cls=W_GcStats, wrapfn="newint")
+        cls=W_GcStats, wrapfn="newint"),
+    total_allocated_memory=interp_attrproperty("total_allocated_memory",
+        cls=W_GcStats, wrapfn="newint"),
+    jit_backend_allocated=interp_attrproperty("jit_backend_allocated",
+        cls=W_GcStats, wrapfn="newint"),
+    jit_backend_used=interp_attrproperty("jit_backend_used",
+        cls=W_GcStats, wrapfn="newint"),
 )
 
 def get_stats(space):
diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py
--- a/rpython/memory/gc/incminimark.py
+++ b/rpython/memory/gc/incminimark.py
@@ -1184,6 +1184,11 @@
         """
         return self.ac.total_memory_used + self.rawmalloced_total_size
 
+    def get_total_memory_alloced(self):
+        """ Return the total memory allocated
+        """
+        return self.ac.total_memory_alloced + self.rawmalloced_total_size
+
     def threshold_reached(self, extra=0):
         return (self.next_major_collection_threshold -
                 float(self.get_total_memory_used())) < float(extra)
@@ -2925,7 +2930,7 @@
         if stats_no == rgc.TOTAL_MEMORY:
             return intmask(self.get_total_memory_used() + self.nursery_size)
         elif stats_no == rgc.TOTAL_ALLOCATED_MEMORY:
-            return 0
+            return intmask(self.get_total_memory_alloced() + self.nursery_size)
         elif stats_no == rgc.TOTAL_MEMORY_PRESSURE:
             return inspector.count_memory_pressure(self)
         return 0
diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py
--- a/rpython/memory/gc/minimarkpage.py
+++ b/rpython/memory/gc/minimarkpage.py
@@ -294,6 +294,7 @@
         # 'arena_base' points to the start of malloced memory; it might not
         # be a page-aligned address
         arena_base = llarena.arena_malloc(self.arena_size, False)
+        self.total_memory_alloced += self.arena_size
         if not arena_base:
             out_of_memory("out of memory: couldn't allocate the next arena")
         arena_end = arena_base + self.arena_size
@@ -398,6 +399,7 @@
                     # The whole arena is empty.  Free it.
                     llarena.arena_reset(arena.base, self.arena_size, 4)
                     llarena.arena_free(arena.base)
+                    self.total_memory_alloced -= self.arena_size
                     lltype.free(arena, flavor='raw', track_allocation=False)
                     #
                 else:

From pypy.commits at gmail.com  Tue Nov 14 05:19:03 2017
From: pypy.commits at gmail.com (stian)
Date: Tue, 14 Nov 2017 02:19:03 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Don't return a copy on long
 // 1
Message-ID: <5a0ac317.26acdf0a.6d561.ad34@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93012:9838b9ca2938
Date: 2017-11-14 11:18 +0100
http://bitbucket.org/pypy/pypy/changeset/9838b9ca2938/

Log:	Don't return a copy on long // 1

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -788,8 +788,8 @@
         if self.sign == 1 and other.numdigits() == 1 and other.sign == 1:
             digit = other.digit(0)
             if digit == 1:
-                return rbigint(self._digits[:self.numdigits()], 1, self.numdigits())
-            elif digit and digit & (digit - 1) == 0:
+                return self
+            elif digit & (digit - 1) == 0:
                 return self.rqshift(ptwotable[digit])
 
         div, mod = _divrem(self, other)

From pypy.commits at gmail.com  Tue Nov 14 11:03:46 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 08:03:46 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: backout b95f1240ad90: this was fixed in
 CPython 3.*
Message-ID: <5a0b13e2.8dbbdf0a.7319.a330@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93013:714cdd09fc99
Date: 2017-11-14 16:03 +0000
http://bitbucket.org/pypy/pypy/changeset/714cdd09fc99/

Log:	backout b95f1240ad90: this was fixed in CPython 3.*

diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -87,17 +87,13 @@
     # permissive parsing of the given list of tokens; it relies on
     # the real parsing done afterwards to give errors.
     it.skip_newlines()
+    it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
+    if it.skip(pygram.tokens.STRING):
+        it.skip_newlines()
 
-    docstring_possible = True
-    while True:
-        it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
-        if docstring_possible and it.skip(pygram.tokens.STRING):
-            it.skip_newlines()
-            docstring_possible = False
-        if not (it.skip_name("from") and
+    while (it.skip_name("from") and
            it.skip_name("__future__") and
            it.skip_name("import")):
-            break
         it.skip(pygram.tokens.LPAR)    # optionally
         # return in 'last_position' any line-column pair that points
         # somewhere inside the last __future__ import statement
diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py
--- a/pypy/interpreter/pyparser/test/test_future.py
+++ b/pypy/interpreter/pyparser/test/test_future.py
@@ -193,13 +193,3 @@
          'from __future__ import with_statement;')
     f = run(s, (2, 23))
     assert f == 0
-
-def test_future_doc_future():
-    # for some reason people do this :-[
-    s = '''
-from  __future__ import generators
-"Docstring"
-from  __future__ import division
-    '''
-    f = run(s, (4, 24))
-    assert f == 0

From pypy.commits at gmail.com  Tue Nov 14 11:31:52 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 08:31:52 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Allow inspect._signature_from_callable()
 to work on builtins (by handling them like user functions)
Message-ID: <5a0b1a78.17711c0a.8005c.71ef@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93014:5d5dfbb116aa
Date: 2017-11-14 16:31 +0000
http://bitbucket.org/pypy/pypy/changeset/5d5dfbb116aa/

Log:	Allow inspect._signature_from_callable() to work on builtins (by
	handling them like user functions)

diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py
--- a/lib-python/3/inspect.py
+++ b/lib-python/3/inspect.py
@@ -1828,7 +1828,7 @@
     kwdefaults = getattr(obj, '__kwdefaults__', _void) # ... and not None here
     annotations = getattr(obj, '__annotations__', None)
 
-    return (isinstance(code, types.CodeType) and
+    return (isinstance(code, (types.CodeType, _builtin_code_type)) and
             isinstance(name, str) and
             (defaults is None or isinstance(defaults, tuple)) and
             (kwdefaults is None or isinstance(kwdefaults, dict)) and

From pypy.commits at gmail.com  Tue Nov 14 11:41:17 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 08:41:17 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: update test for PyPy
Message-ID: <5a0b1cad.499edf0a.3360b.c8c9@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93015:b05acdc71ad8
Date: 2017-11-14 16:40 +0000
http://bitbucket.org/pypy/pypy/changeset/b05acdc71ad8/

Log:	update test for PyPy

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -816,7 +816,9 @@
 
         attrs = attrs_wo_objs(A)
 
-        self.assertIn(('__new__', 'method', object), attrs, 'missing __new__')
+        # changed in PyPy
+        self.assertIn(('__new__', 'static method', object), attrs, 'missing __new__')
+
         self.assertIn(('__init__', 'method', object), attrs, 'missing __init__')
 
         self.assertIn(('s', 'static method', A), attrs, 'missing static method')

From pypy.commits at gmail.com  Tue Nov 14 12:38:36 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 09:38:36 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Correctly compute .co_kwonlyargcount on
 BuiltinCode objects
Message-ID: <5a0b2a1c.c7c61c0a.61060.6e8c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93016:d129c0d2de48
Date: 2017-11-14 17:38 +0000
http://bitbucket.org/pypy/pypy/changeset/d129c0d2de48/

Log:	Correctly compute .co_kwonlyargcount on BuiltinCode objects

diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py
--- a/pypy/interpreter/test/test_gateway.py
+++ b/pypy/interpreter/test/test_gateway.py
@@ -18,7 +18,7 @@
 
 
 class TestBuiltinCode:
-    def test_signature(self):
+    def test_signature(self, space):
         def c(space, w_x, w_y, hello_w):
             pass
         code = gateway.BuiltinCode(c, unwrap_spec=[gateway.ObjSpace,
@@ -53,6 +53,8 @@
         code = gateway.BuiltinCode(f, unwrap_spec=[gateway.ObjSpace,
                                                    "kwonly", W_Root])
         assert code.signature() == Signature([], kwonlyargnames=['x'])
+        assert space.int_w(space.getattr(
+            code, space.newtext('co_kwonlyargcount'))) == 1
 
 
     def test_call(self):
diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py
--- a/pypy/interpreter/typedef.py
+++ b/pypy/interpreter/typedef.py
@@ -538,6 +538,9 @@
 def fget_co_argcount(space, code): # unwrapping through unwrap_spec
     return space.newint(code.signature().num_argnames())
 
+def fget_co_kwonlyargcount(space, code): # unwrapping through unwrap_spec
+    return space.newint(code.signature().num_kwonlyargnames())
+
 def fget_zero(space, code):
     return space.newint(0)
 
@@ -597,7 +600,7 @@
     co_name = interp_attrproperty('co_name', cls=BuiltinCode, wrapfn="newtext_or_none"),
     co_varnames = GetSetProperty(fget_co_varnames, cls=BuiltinCode),
     co_argcount = GetSetProperty(fget_co_argcount, cls=BuiltinCode),
-    co_kwonlyargcount = GetSetProperty(fget_zero, cls=BuiltinCode),
+    co_kwonlyargcount = GetSetProperty(fget_co_kwonlyargcount, cls=BuiltinCode),
     co_flags = GetSetProperty(fget_co_flags, cls=BuiltinCode),
     co_consts = GetSetProperty(fget_co_consts, cls=BuiltinCode),
     )

From pypy.commits at gmail.com  Tue Nov 14 13:54:28 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 10:54:28 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: ignoring test_finddoc seems acceptable
Message-ID: <5a0b3be4.08b51c0a.c1328.9f1c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93017:f0a8cba78c24
Date: 2017-11-14 18:54 +0000
http://bitbucket.org/pypy/pypy/changeset/f0a8cba78c24/

Log:	ignoring test_finddoc seems acceptable

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -364,6 +364,7 @@
         self.assertEqual(inspect.getdoc(mod.FesteringGob.contradiction),
                          'The automatic gainsaying.')
 
+    @cpython_only  # XXX: _finddoc() is broken on PyPy, but getdoc() seems OK
     @unittest.skipIf(MISSING_C_DOCSTRINGS, "test requires docstrings")
     def test_finddoc(self):
         finddoc = inspect._finddoc

From pypy.commits at gmail.com  Tue Nov 14 14:01:53 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 11:01:53 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: tweak test
Message-ID: <5a0b3da1.05d31c0a.2da46.9bac@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93018:1465066182bd
Date: 2017-11-14 19:01 +0000
http://bitbucket.org/pypy/pypy/changeset/1465066182bd/

Log:	tweak test

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -2023,7 +2023,7 @@
 
         # This doesn't work now.
         # (We don't have a valid signature for "type" in 3.4)
-        with self.assertRaisesRegex(ValueError, "no signature found"):
+        with self.assertRaisesRegex(ValueError, "signature"):
             class ThisWorksNow:
                 __call__ = type
             test_callable(ThisWorksNow())

From pypy.commits at gmail.com  Tue Nov 14 14:51:55 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 11:51:55 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: PyUnicode_FromObject only works on
 instances of str
Message-ID: <5a0b495b.4fcb1c0a.8a596.3c40@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93019:a3c86e99e3e4
Date: 2017-11-14 19:51 +0000
http://bitbucket.org/pypy/pypy/changeset/a3c86e99e3e4/

Log:	PyUnicode_FromObject only works on instances of str

diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -319,6 +319,20 @@
         assert module.unsafe_len(u'a&#1040;b&#1041;c&#1057;d&#1044;') == 8
         assert module.unsafe_len(u"caf&#233;\U0001F4A9") == 5
 
+    def test_FromObject(self):
+        module = self.import_extension('foo', [
+            ("from_object", "METH_O",
+             """
+                return PyUnicode_FromObject(args);
+             """)])
+        class my_str(str): pass
+        assert module.from_object('abc') == 'abc'
+        res = module.from_object(my_str('abc'))
+        assert type(res) is str
+        assert res == 'abc'
+        raises(TypeError, module.from_object, b'abc')
+        raises(TypeError, module.from_object, 42)
+
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):
@@ -500,6 +514,12 @@
             assert ret == Py_CLEANUP_SUPPORTED
             assert space.isinstance_w(from_ref(space, result[0]), space.w_bytes)
             assert PyUnicode_FSDecoder(space, None, result) == 1
+        # Input is invalid
+        w_input = space.newint(42)
+        with lltype.scoped_alloc(PyObjectP.TO, 1) as result:
+            with pytest.raises(OperationError):
+                PyUnicode_FSConverter(space, w_input, result)
+
 
     def test_IS(self, space):
         for char in [0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x1c, 0x1d, 0x1e, 0x1f,
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -505,12 +505,19 @@
 
 @cpython_api([PyObject], PyObject)
 def PyUnicode_FromObject(space, w_obj):
-    """Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict") which is used
-    throughout the interpreter whenever coercion to Unicode is needed."""
+    """Copy an instance of a Unicode subtype to a new true Unicode object if
+    necessary. If obj is already a true Unicode object (not a subtype), return
+    the reference with incremented refcount.
+
+    Objects other than Unicode or its subtypes will cause a TypeError.
+    """
     if space.is_w(space.type(w_obj), space.w_unicode):
         return w_obj
+    elif space.isinstance_w(w_obj, space.w_unicode):
+        return space.call_function(space.w_unicode, w_obj)
     else:
-        return space.call_function(space.w_unicode, w_obj)
+        raise oefmt(space.w_TypeError,
+                    "Can't convert '%T' object to str implicitly", w_obj)
 
 @cpython_api([PyObject, CONST_STRING, CONST_STRING], PyObject)
 def PyUnicode_FromEncodedObject(space, w_obj, encoding, errors):

From pypy.commits at gmail.com  Tue Nov 14 15:13:11 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 12:13:11 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: skip or tweak tests
Message-ID: <5a0b4e57.8faedf0a.e4890.3f32@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93020:6f9bbe108de5
Date: 2017-11-14 20:12 +0000
http://bitbucket.org/pypy/pypy/changeset/6f9bbe108de5/

Log:	skip or tweak tests

diff --git a/lib-python/3/test/test_capi.py b/lib-python/3/test/test_capi.py
--- a/lib-python/3/test/test_capi.py
+++ b/lib-python/3/test/test_capi.py
@@ -53,6 +53,8 @@
         self.assertEqual(testfunction.attribute, "test")
         self.assertRaises(AttributeError, setattr, inst.testfunction, "attribute", "test")
 
+    @unittest.skipIf(support.check_impl_detail(pypy=True),
+                    "doesn't crash on PyPy")
     @unittest.skipUnless(threading, 'Threading required for this test.')
     def test_no_FatalError_infinite_loop(self):
         with support.SuppressCrashReport():
@@ -205,9 +207,9 @@
         else:
             with self.assertRaises(SystemError) as cm:
                 _testcapi.return_null_without_error()
+            # PyPy change: different message
             self.assertRegex(str(cm.exception),
-                             'return_null_without_error.* '
-                             'returned NULL without setting an error')
+                'Function returned a NULL result without setting an exception')
 
     def test_return_result_with_error(self):
         # Issue #23571: A function must not return a result with an error set
@@ -237,9 +239,9 @@
         else:
             with self.assertRaises(SystemError) as cm:
                 _testcapi.return_result_with_error()
+            # PyPy change: different message
             self.assertRegex(str(cm.exception),
-                             'return_result_with_error.* '
-                             'returned a result with an error set')
+                'An exception was set, but function returned a value')
 
     def test_buildvalue_N(self):
         _testcapi.test_buildvalue_N()
@@ -327,6 +329,8 @@
         self.pendingcalls_wait(l, n)
 
 
+ at unittest.skipIf(support.check_impl_detail(pypy=True),
+                "subinterpreters not implemented on PyPy")
 class SubinterpreterTest(unittest.TestCase):
 
     def test_subinterps(self):

From pypy.commits at gmail.com  Tue Nov 14 16:00:21 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 13:00:21 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Unskip _testcapi tests that should pass,
 skip those that cannot
Message-ID: <5a0b5965.d18d1c0a.7250a.a931@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93021:66f524561285
Date: 2017-11-14 20:59 +0000
http://bitbucket.org/pypy/pypy/changeset/66f524561285/

Log:	Unskip _testcapi tests that should pass, skip those that cannot

diff --git a/lib-python/3/test/test_capi.py b/lib-python/3/test/test_capi.py
--- a/lib-python/3/test/test_capi.py
+++ b/lib-python/3/test/test_capi.py
@@ -29,8 +29,9 @@
 skips = []
 if support.check_impl_detail(pypy=True):
     skips += [
-            'test_widechar',
-            ]
+        'test_lazy_hash_inheritance',
+        'test_capsule',
+    ]
 
 def testfunction(self):
     """some doc"""
diff --git a/lib_pypy/_testcapimodule.c b/lib_pypy/_testcapimodule.c
--- a/lib_pypy/_testcapimodule.c
+++ b/lib_pypy/_testcapimodule.c
@@ -2818,8 +2818,6 @@
     return PyMemoryView_FromBuffer(&info);
 }
 
-#ifndef PYPY_VERSION
-
 static PyObject *
 test_from_contiguous(PyObject* self, PyObject *noargs)
 {
@@ -2869,7 +2867,6 @@
 
     Py_RETURN_NONE;
 }
-#endif  /* PYPY_VERSION */
 
 #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION)
 extern PyTypeObject _PyBytesIOBuffer_Type;
@@ -3907,9 +3904,7 @@
     {"test_string_to_double", (PyCFunction)test_string_to_double, METH_NOARGS},
     {"test_unicode_compare_with_ascii", (PyCFunction)test_unicode_compare_with_ascii, METH_NOARGS},
     {"test_capsule", (PyCFunction)test_capsule, METH_NOARGS},
-#ifndef PYPY_VERSION
     {"test_from_contiguous", (PyCFunction)test_from_contiguous, METH_NOARGS},
-#endif
 #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION)
     {"test_pep3118_obsolete_write_locks", (PyCFunction)test_pep3118_obsolete_write_locks, METH_NOARGS},
 #endif

From pypy.commits at gmail.com  Tue Nov 14 16:14:47 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 13:14:47 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Check for NULL in PyMemoryView_FromBuffer
Message-ID: <5a0b5cc7.4eb6df0a.d7dd1.558a@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93022:57dc41aeb601
Date: 2017-11-14 21:14 +0000
http://bitbucket.org/pypy/pypy/changeset/57dc41aeb601/

Log:	Check for NULL in PyMemoryView_FromBuffer

diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py
--- a/pypy/module/cpyext/memoryobject.py
+++ b/pypy/module/cpyext/memoryobject.py
@@ -201,6 +201,10 @@
     The memoryview object then owns the buffer represented by view, which
     means you shouldn't try to call PyBuffer_Release() yourself: it
     will be done on deallocation of the memoryview object."""
+    if not view.c_buf:
+        raise oefmt(space.w_ValueError,
+            "PyMemoryView_FromBuffer(): info->buf must not be NULL")
+
     # XXX this should allocate a PyMemoryViewObject and
     # copy view into obj.c_view, without creating a new view.c_obj
     typedescr = get_typedescr(W_MemoryView.typedef)
diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py
--- a/pypy/module/cpyext/test/test_memoryobject.py
+++ b/pypy/module/cpyext/test/test_memoryobject.py
@@ -255,3 +255,13 @@
              """)])
         mv = module.new()
         assert mv.tobytes() == b'hell'
+
+    def test_FromBuffer_NULL(self):
+        module = self.import_extension('foo', [
+            ('new', 'METH_NOARGS', """
+            Py_buffer info;
+            if (PyBuffer_FillInfo(&info, NULL, NULL, 1, 1, PyBUF_FULL_RO) < 0)
+                return NULL;
+            return PyMemoryView_FromBuffer(&info);
+             """)])
+        raises(ValueError, module.new)

From pypy.commits at gmail.com  Tue Nov 14 16:43:25 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 13:43:25 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Define SIZEOF_WCHAR_T in pyconfig.h and
 copy CPython logic for the related Py_UNICODE_XXX defines
Message-ID: <5a0b637d.cc091c0a.7ba4e.4202@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93023:69a07b055bdd
Date: 2017-11-14 21:43 +0000
http://bitbucket.org/pypy/pypy/changeset/69a07b055bdd/

Log:	Define SIZEOF_WCHAR_T in pyconfig.h and copy CPython logic for the
	related Py_UNICODE_XXX defines

diff --git a/pypy/module/cpyext/include/pyconfig.h b/pypy/module/cpyext/include/pyconfig.h
--- a/pypy/module/cpyext/include/pyconfig.h
+++ b/pypy/module/cpyext/include/pyconfig.h
@@ -21,10 +21,9 @@
 /* PyPy supposes Py_UNICODE == wchar_t */
 #define HAVE_USABLE_WCHAR_T 1
 #ifndef _WIN32
-#define Py_UNICODE_SIZE 4
-#define Py_UNICODE_WIDE
+#define SIZEOF_WCHAR_T 4
 #else
-#define Py_UNICODE_SIZE 2
+#define SIZEOF_WCHAR_T 2
 #endif
 
 #ifndef _WIN32
diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -1,6 +1,25 @@
 #ifndef Py_UNICODEOBJECT_H
 #define Py_UNICODEOBJECT_H
 
+#ifndef SIZEOF_WCHAR_T
+#error Must define SIZEOF_WCHAR_T
+#endif
+
+#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
+
+/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
+   Otherwise, Unicode strings are stored as UCS-2 (with limited support
+   for UTF-16) */
+
+#if Py_UNICODE_SIZE >= 4
+#define Py_UNICODE_WIDE
+#endif
+
+/* Set these flags if the platform has "wchar.h" and the
+   wchar_t type is a 16-bit unsigned type */
+/* #define HAVE_WCHAR_H */
+/* #define HAVE_USABLE_WCHAR_T */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -333,6 +333,29 @@
         raises(TypeError, module.from_object, b'abc')
         raises(TypeError, module.from_object, 42)
 
+    def test_widechar(self):
+        module = self.import_extension('foo', [
+            ("make_wide", "METH_NOARGS",
+             """
+            #if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4)
+                const wchar_t wtext[2] = {(wchar_t)0x10ABCDu};
+                size_t wtextlen = 1;
+                const wchar_t invalid[1] = {(wchar_t)0x110000u};
+            #else
+                const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu};
+                size_t wtextlen = 2;
+            #endif
+                return PyUnicode_FromWideChar(wtext, wtextlen);
+             """),
+            ("make_utf8", "METH_NOARGS",
+             """
+            return PyUnicode_FromString("\\xf4\\x8a\\xaf\\x8d");
+             """)])
+        wide = module.make_wide()
+        utf8 = module.make_utf8()
+        print(repr(wide), repr(utf8))
+        assert wide == utf8
+
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):

From pypy.commits at gmail.com  Tue Nov 14 18:09:33 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 15:09:33 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: fix
Message-ID: <5a0b77ad.ecb2df0a.b8660.a78e@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93024:bf4ed8da1af1
Date: 2017-11-14 23:09 +0000
http://bitbucket.org/pypy/pypy/changeset/bf4ed8da1af1/

Log:	fix

diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py
--- a/pypy/module/cpyext/memoryobject.py
+++ b/pypy/module/cpyext/memoryobject.py
@@ -1,3 +1,4 @@
+from pypy.interpreter.error import oefmt
 from pypy.module.cpyext.api import (
     cpython_api, CANNOT_FAIL, Py_MAX_FMT, Py_MAX_NDIMS, build_type_checkers,
     Py_ssize_tP, cts, parse_dir, bootstrap_function, Py_bufferP, slot_function)

From pypy.commits at gmail.com  Tue Nov 14 18:11:08 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 14 Nov 2017 15:11:08 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Update getargs.c to match CPython 3.5.4
Message-ID: <5a0b780c.cfd51c0a.3b8eb.1012@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93025:00710c6f69a6
Date: 2017-11-14 23:10 +0000
http://bitbucket.org/pypy/pypy/changeset/00710c6f69a6/

Log:	Update getargs.c to match CPython 3.5.4

diff --git a/pypy/module/cpyext/src/getargs.c b/pypy/module/cpyext/src/getargs.c
--- a/pypy/module/cpyext/src/getargs.c
+++ b/pypy/module/cpyext/src/getargs.c
@@ -35,14 +35,16 @@
 } freelistentry_t;
 
 typedef struct {
+  freelistentry_t *entries;
   int first_available;
-  freelistentry_t *entries;
+  int entries_malloced;
 } freelist_t;
 
+#define STATIC_FREELIST_ENTRIES 8
 
 /* Forward */
 static int vgetargs1(PyObject *, const char *, va_list *, int);
-static void seterror(int, const char *, int *, const char *, const char *);
+static void seterror(Py_ssize_t, const char *, int *, const char *, const char *);
 static char *convertitem(PyObject *, const char **, va_list *, int, int *,
                          char *, size_t, freelist_t *);
 static char *converttuple(PyObject *, const char **, va_list *, int,
@@ -176,7 +178,8 @@
                                               freelist->entries[index].item);
       }
     }
-    PyMem_FREE(freelist->entries);
+    if (freelist->entries_malloced)
+        PyMem_FREE(freelist->entries);
     return retval;
 }
 
@@ -195,8 +198,13 @@
     const char *formatsave = format;
     Py_ssize_t i, len;
     char *msg;
-    freelist_t freelist = {0, NULL};
     int compat = flags & FLAG_COMPAT;
+    freelistentry_t static_entries[STATIC_FREELIST_ENTRIES];
+    freelist_t freelist;
+
+    freelist.entries = static_entries;
+    freelist.first_available = 0;
+    freelist.entries_malloced = 0;
 
     assert(compat || (args != (PyObject*)NULL));
     flags = flags & ~FLAG_COMPAT;
@@ -229,15 +237,15 @@
             message = format;
             endfmt = 1;
             break;
+        case '|':
+            if (level == 0)
+                min = max;
+            break;
         default:
             if (level == 0) {
-                if (c == 'O')
-                    max++;
-                else if (isalpha(Py_CHARMASK(c))) {
+                if (isalpha(Py_CHARMASK(c)))
                     if (c != 'e') /* skip encoded */
                         max++;
-                } else if (c == '|')
-                    min = max;
             }
             break;
         }
@@ -251,30 +259,31 @@
 
     format = formatsave;
 
-    freelist.entries = PyMem_NEW(freelistentry_t, max);
-    if (freelist.entries == NULL) {
-        PyErr_NoMemory();
-        return 0;
+    if (max > STATIC_FREELIST_ENTRIES) {
+        freelist.entries = PyMem_NEW(freelistentry_t, max);
+        if (freelist.entries == NULL) {
+            PyErr_NoMemory();
+            return 0;
+        }
+        freelist.entries_malloced = 1;
     }
 
     if (compat) {
         if (max == 0) {
             if (args == NULL)
                 return 1;
-            PyOS_snprintf(msgbuf, sizeof(msgbuf),
-                          "%.200s%s takes no arguments",
-                          fname==NULL ? "function" : fname,
-                          fname==NULL ? "" : "()");
-            PyErr_SetString(PyExc_TypeError, msgbuf);
+            PyErr_Format(PyExc_TypeError,
+                         "%.200s%s takes no arguments",
+                         fname==NULL ? "function" : fname,
+                         fname==NULL ? "" : "()");
             return cleanreturn(0, &freelist);
         }
         else if (min == 1 && max == 1) {
             if (args == NULL) {
-                PyOS_snprintf(msgbuf, sizeof(msgbuf),
-                      "%.200s%s takes at least one argument",
-                          fname==NULL ? "function" : fname,
-                          fname==NULL ? "" : "()");
-                PyErr_SetString(PyExc_TypeError, msgbuf);
+                PyErr_Format(PyExc_TypeError,
+                             "%.200s%s takes at least one argument",
+                             fname==NULL ? "function" : fname,
+                             fname==NULL ? "" : "()");
                 return cleanreturn(0, &freelist);
             }
             msg = convertitem(args, &format, p_va, flags, levels,
@@ -300,20 +309,18 @@
     len = PyTuple_GET_SIZE(args);
 
     if (len < min || max < len) {
-        if (message == NULL) {
-            PyOS_snprintf(msgbuf, sizeof(msgbuf),
-                          "%.150s%s takes %s %d argument%s "
-                          "(%ld given)",
-                          fname==NULL ? "function" : fname,
-                          fname==NULL ? "" : "()",
-                          min==max ? "exactly"
-                          : len < min ? "at least" : "at most",
-                          len < min ? min : max,
-                          (len < min ? min : max) == 1 ? "" : "s",
-                          Py_SAFE_DOWNCAST(len, Py_ssize_t, long));
-            message = msgbuf;
-        }
-        PyErr_SetString(PyExc_TypeError, message);
+        if (message == NULL)
+            PyErr_Format(PyExc_TypeError,
+                         "%.150s%s takes %s %d argument%s (%ld given)",
+                         fname==NULL ? "function" : fname,
+                         fname==NULL ? "" : "()",
+                         min==max ? "exactly"
+                         : len < min ? "at least" : "at most",
+                         len < min ? min : max,
+                         (len < min ? min : max) == 1 ? "" : "s",
+                         Py_SAFE_DOWNCAST(len, Py_ssize_t, long));
+        else
+            PyErr_SetString(PyExc_TypeError, message);
         return cleanreturn(0, &freelist);
     }
 
@@ -324,7 +331,7 @@
                           flags, levels, msgbuf,
                           sizeof(msgbuf), &freelist);
         if (msg) {
-            seterror(i+1, msg, levels, fname, msg);
+            seterror(i+1, msg, levels, fname, message);
             return cleanreturn(0, &freelist);
         }
     }
@@ -343,7 +350,7 @@
 
 
 static void
-seterror(int iarg, const char *msg, int *levels, const char *fname,
+seterror(Py_ssize_t iarg, const char *msg, int *levels, const char *fname,
          const char *message)
 {
     char buf[512];
@@ -359,10 +366,10 @@
         }
         if (iarg != 0) {
             PyOS_snprintf(p, sizeof(buf) - (p - buf),
-                          "argument %d", iarg);
+                          "argument %" PY_FORMAT_SIZE_T "d", iarg);
             i = 0;
             p += strlen(p);
-            while (levels[i] > 0 && i < 32 && (int)(p-buf) < 220) {
+            while (i < 32 && levels[i] > 0 && (int)(p-buf) < 220) {
                 PyOS_snprintf(p, sizeof(buf) - (p - buf),
                               ", item %d", levels[i]-1);
                 p += strlen(p);
@@ -407,6 +414,7 @@
     int n = 0;
     const char *format = *p_format;
     int i;
+    Py_ssize_t len;
 
     for (;;) {
         int c = *format++;
@@ -436,12 +444,20 @@
         return msgbuf;
     }
 
-    if ((i = PySequence_Size(arg)) != n) {
+    len = PySequence_Size(arg);
+    if (len != n) {
         levels[0] = 0;
-        PyOS_snprintf(msgbuf, bufsize,
-                      toplevel ? "expected %d arguments, not %d" :
-                     "must be sequence of length %d, not %d",
-                  n, i);
+        if (toplevel) {
+            PyOS_snprintf(msgbuf, bufsize,
+                          "expected %d arguments, not %" PY_FORMAT_SIZE_T "d",
+                          n, len);
+        }
+        else {
+            PyOS_snprintf(msgbuf, bufsize,
+                          "must be sequence of length %d, "
+                          "not %" PY_FORMAT_SIZE_T "d",
+                          n, len);
+        }
         return msgbuf;
     }
 
@@ -457,7 +473,6 @@
             strncpy(msgbuf, "is not retrievable", bufsize);
             return msgbuf;
         }
-	//PyPy_Borrow(arg, item);
         msg = convertitem(item, &format, p_va, flags, levels+1,
                           msgbuf, bufsize, freelist);
         /* PySequence_GetItem calls tp->sq_item, which INCREFs */
@@ -502,9 +517,6 @@
 
 
-#define UNICODE_DEFAULT_ENCODING(arg) \
-    _PyUnicode_AsDefaultEncodedString(arg, NULL)
-
 /* Format an error message generated by convertsimple(). */
 
 static char *
@@ -512,9 +524,15 @@
 {
     assert(expected != NULL);
     assert(arg != NULL);
-    PyOS_snprintf(msgbuf, bufsize,
-                  "must be %.50s, not %.50s", expected,
-                  arg == Py_None ? "None" : arg->ob_type->tp_name);
+    if (expected[0] == '(') {
+        PyOS_snprintf(msgbuf, bufsize,
+                      "%.100s", expected);
+    }
+    else {
+        PyOS_snprintf(msgbuf, bufsize,
+                      "must be %.50s, not %.50s", expected,
+                      arg == Py_None ? "None" : arg->ob_type->tp_name);
+    }
     return msgbuf;
 }
 
@@ -560,14 +578,14 @@
                 "size does not fit in an int"); \
             return converterr("", arg, msgbuf, bufsize); \
         } \
-        *q=s; \
+        *q = (int)s; \
     }
 #define BUFFER_LEN      ((flags & FLAG_SIZE_T) ? *q2:*q)
 #define RETURN_ERR_OCCURRED return msgbuf
 
     const char *format = *p_format;
     char c = *format++;
-    PyObject *uarg;
+    char *sarg;
 
     switch (c) {
 
@@ -718,7 +736,7 @@
         if (PyLong_Check(arg))
             ival = PyLong_AsUnsignedLongMask(arg);
         else
-            return converterr("integer<k>", arg, msgbuf, bufsize);
+            return converterr("int", arg, msgbuf, bufsize);
         *p = ival;
         break;
     }
@@ -743,7 +761,7 @@
         if (PyLong_Check(arg))
             ival = PyLong_AsUnsignedLongLongMask(arg);
         else
-            return converterr("integer<K>", arg, msgbuf, bufsize);
+            return converterr("int", arg, msgbuf, bufsize);
         *p = ival;
         break;
     }
@@ -784,6 +802,8 @@
         char *p = va_arg(*p_va, char *);
         if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1)
             *p = PyBytes_AS_STRING(arg)[0];
+        else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1)
+            *p = PyByteArray_AS_STRING(arg)[0];
         else
             return converterr("a byte string of length 1", arg, msgbuf, bufsize);
         break;
@@ -791,18 +811,40 @@
 
     case 'C': {/* unicode char */
         int *p = va_arg(*p_va, int *);
-        if (PyUnicode_Check(arg) &&
-            PyUnicode_GET_SIZE(arg) == 1)
-            *p = PyUnicode_AS_UNICODE(arg)[0];
+        int kind;
+        void *data;
+
+        if (!PyUnicode_Check(arg))
+            return converterr("a unicode character", arg, msgbuf, bufsize);
+
+        if (PyUnicode_READY(arg))
+            RETURN_ERR_OCCURRED;
+
+        if (PyUnicode_GET_LENGTH(arg) != 1)
+            return converterr("a unicode character", arg, msgbuf, bufsize);
+
+        kind = PyUnicode_KIND(arg);
+        data = PyUnicode_DATA(arg);
+        *p = PyUnicode_READ(kind, data, 0);
+        break;
+    }
+
+    case 'p': {/* boolean *p*redicate */
+        int *p = va_arg(*p_va, int *);
+        int val = PyObject_IsTrue(arg);
+        if (val > 0)
+            *p = 1;
+        else if (val == 0)
+            *p = 0;
         else
-            return converterr("a unicode character", arg, msgbuf, bufsize);
+            RETURN_ERR_OCCURRED;
         break;
     }
 
     /* XXX WAAAAH!  's', 'y', 'z', 'u', 'Z', 'e', 'w' codes all
        need to be cleaned up! */
 
-    case 'y': {/* any buffer-like object, but not PyUnicode */
+    case 'y': {/* any bytes-like object */
         void **p = (void **)va_arg(*p_va, char **);
         char *buf;
         Py_ssize_t count;
@@ -825,16 +867,16 @@
             STORE_SIZE(count);
             format++;
         } else {
-            if (strlen(*p) != count)
-                return converterr(
-                    "bytes without null bytes",
-                    arg, msgbuf, bufsize);
+            if (strlen(*p) != (size_t)count) {
+                PyErr_SetString(PyExc_ValueError, "embedded null byte");
+                RETURN_ERR_OCCURRED;
+            }
         }
         break;
     }
 
-    case 's': /* text string */
-    case 'z': /* text string or None */
+    case 's': /* text string or bytes-like object */
+    case 'z': /* text string, bytes-like object or None */
     {
         if (*format == '*') {
             /* "s*" or "z*" */
@@ -843,15 +885,14 @@
             if (c == 'z' && arg == Py_None)
                 PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0);
             else if (PyUnicode_Check(arg)) {
-                uarg = UNICODE_DEFAULT_ENCODING(arg);
-                if (uarg == NULL)
+                Py_ssize_t len;
+                sarg = PyUnicode_AsUTF8AndSize(arg, &len);
+                if (sarg == NULL)
                     return converterr(CONV_UNICODE,
                                       arg, msgbuf, bufsize);
-                PyBuffer_FillInfo(p, arg,
-                                  PyBytes_AS_STRING(uarg), PyBytes_GET_SIZE(uarg),
-                                  1, 0);
+                PyBuffer_FillInfo(p, arg, sarg, len, 1, 0);
             }
-            else { /* any buffer-like object */
+            else { /* any bytes-like object */
                 char *buf;
                 if (getbuffer(arg, p, &buf) < 0)
                     return converterr(buf, arg, msgbuf, bufsize);
@@ -862,7 +903,7 @@
                     arg, msgbuf, bufsize);
             }
             format++;
-        } else if (*format == '#') { /* any buffer-like object */
+        } else if (*format == '#') { /* a string or read-only bytes-like object */
             /* "s#" or "z#" */
             void **p = (void **)va_arg(*p_va, char **);
             FETCH_SIZE;
@@ -872,14 +913,15 @@
                 STORE_SIZE(0);
             }
             else if (PyUnicode_Check(arg)) {
-                uarg = UNICODE_DEFAULT_ENCODING(arg);
-                if (uarg == NULL)
+                Py_ssize_t len;
+                sarg = PyUnicode_AsUTF8AndSize(arg, &len);
+                if (sarg == NULL)
                     return converterr(CONV_UNICODE,
                                       arg, msgbuf, bufsize);
-                *p = PyBytes_AS_STRING(uarg);
-                STORE_SIZE(PyBytes_GET_SIZE(uarg));
+                *p = sarg;
+                STORE_SIZE(len);
             }
-            else { /* any buffer-like object */
+            else { /* read-only bytes-like object */
                 /* XXX Really? */
                 char *buf;
                 Py_ssize_t count = convertbuffer(arg, p, &buf);
@@ -891,26 +933,25 @@
         } else {
             /* "s" or "z" */
             char **p = va_arg(*p_va, char **);
-            uarg = NULL;
+            Py_ssize_t len;
+            sarg = NULL;
 
             if (c == 'z' && arg == Py_None)
                 *p = NULL;
             else if (PyUnicode_Check(arg)) {
-                uarg = UNICODE_DEFAULT_ENCODING(arg);
-                if (uarg == NULL)
+                sarg = PyUnicode_AsUTF8AndSize(arg, &len);
+                if (sarg == NULL)
                     return converterr(CONV_UNICODE,
                                       arg, msgbuf, bufsize);
-                *p = PyBytes_AS_STRING(uarg);
+                if (strlen(sarg) != (size_t)len) {
+                    PyErr_SetString(PyExc_ValueError, "embedded null character");
+                    RETURN_ERR_OCCURRED;
+                }
+                *p = sarg;
             }
             else
                 return converterr(c == 'z' ? "str or None" : "str",
                                   arg, msgbuf, bufsize);
-            if (*p != NULL && uarg != NULL &&
-                (Py_ssize_t) strlen(*p) != PyBytes_GET_SIZE(uarg))
-                return converterr(
-                    c == 'z' ? "str without null bytes or None"
-                             : "str without null bytes",
-                    arg, msgbuf, bufsize);
         }
         break;
     }
@@ -918,9 +959,10 @@
     case 'u': /* raw unicode buffer (Py_UNICODE *) */
     case 'Z': /* raw unicode buffer or None */
     {
-        if (*format == '#') { /* any buffer-like object */
-            /* "s#" or "Z#" */
-            Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
+        Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
+
+        if (*format == '#') {
+            /* "u#" or "Z#" */
             FETCH_SIZE;
 
             if (c == 'Z' && arg == Py_None) {
@@ -928,24 +970,29 @@
                 STORE_SIZE(0);
             }
             else if (PyUnicode_Check(arg)) {
-                *p = PyUnicode_AS_UNICODE(arg);
-                STORE_SIZE(PyUnicode_GET_SIZE(arg));
+                Py_ssize_t len;
+                *p = PyUnicode_AsUnicodeAndSize(arg, &len);
+                if (*p == NULL)
+                    RETURN_ERR_OCCURRED;
+                STORE_SIZE(len);
             }
             else
-                return converterr("str or None", arg, msgbuf, bufsize);
+                return converterr(c == 'Z' ? "str or None" : "str",
+                                  arg, msgbuf, bufsize);
             format++;
         } else {
-            /* "s" or "Z" */
-            Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **);
-
+            /* "u" or "Z" */
             if (c == 'Z' && arg == Py_None)
                 *p = NULL;
             else if (PyUnicode_Check(arg)) {
-                *p = PyUnicode_AS_UNICODE(arg);
-                if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg))
-                    return converterr(
-                        "str without null character or None",
-                        arg, msgbuf, bufsize);
+                Py_ssize_t len;
+                *p = PyUnicode_AsUnicodeAndSize(arg, &len);
+                if (*p == NULL)
+                    RETURN_ERR_OCCURRED;
+                if (Py_UNICODE_strlen(*p) != (size_t)len) {
+                    PyErr_SetString(PyExc_ValueError, "embedded null character");
+                    RETURN_ERR_OCCURRED;
+                }
             } else
                 return converterr(c == 'Z' ? "str or None" : "str",
                                   arg, msgbuf, bufsize);
@@ -1071,9 +1118,11 @@
             } else {
                 if (size + 1 > BUFFER_LEN) {
                     Py_DECREF(s);
-                    return converterr(
-                        "(buffer overflow)",
-                        arg, msgbuf, bufsize);
+                    PyErr_Format(PyExc_TypeError,
+                                 "encoded string too long "
+                                 "(%zd, maximum length %zd)",
+                                 (Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1));
+                    RETURN_ERR_OCCURRED;
                 }
             }
             memcpy(*buffer, ptr, size+1);
@@ -1095,7 +1144,7 @@
             if ((Py_ssize_t)strlen(ptr) != size) {
                 Py_DECREF(s);
                 return converterr(
-                    "encoded string without NULL bytes",
+                    "encoded string without null bytes",
                     arg, msgbuf, bufsize);
             }
             *buffer = PyMem_NEW(char, size + 1);
@@ -1135,8 +1184,11 @@
 
     case 'U': { /* PyUnicode object */
         PyObject **p = va_arg(*p_va, PyObject **);
-        if (PyUnicode_Check(arg))
+        if (PyUnicode_Check(arg)) {
+            if (PyUnicode_READY(arg) == -1)
+                RETURN_ERR_OCCURRED;
             *p = arg;
+        }
         else
             return converterr("str", arg, msgbuf, bufsize);
         break;
@@ -1182,7 +1234,7 @@
 
         if (*format != '*')
             return converterr(
-                "invalid use of 'w' format character",
+                "(invalid use of 'w' format character)",
                 arg, msgbuf, bufsize);
         format++;
 
@@ -1190,7 +1242,8 @@
            supports it directly. */
         if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) {
             PyErr_Clear();
-            return converterr("read-write buffer", arg, msgbuf, bufsize);
+            return converterr("read-write bytes-like object",
+                              arg, msgbuf, bufsize);
         }
         if (!PyBuffer_IsContiguous((Py_buffer*)p, 'C')) {
             PyBuffer_Release((Py_buffer*)p);
@@ -1205,7 +1258,7 @@
     }
 
     default:
-        return converterr("impossible<bad format char>", arg, msgbuf, bufsize);
+        return converterr("(impossible<bad format char>)", arg, msgbuf, bufsize);
 
     }
 
@@ -1228,7 +1281,7 @@
     *errmsg = NULL;
     *p = NULL;
     if (pb != NULL && pb->bf_releasebuffer != NULL) {
-        *errmsg = "read-only pinned buffer";
+        *errmsg = "read-only bytes-like object";
         return -1;
     }
 
@@ -1244,7 +1297,7 @@
 getbuffer(PyObject *arg, Py_buffer *view, char **errmsg)
 {
     if (PyObject_GetBuffer(arg, view, PyBUF_SIMPLE) != 0) {
-        *errmsg = "bytes or buffer";
+        *errmsg = "bytes-like object";
         return -1;
     }
     if (!PyBuffer_IsContiguous(view, 'C')) {
@@ -1383,9 +1436,16 @@
     int levels[32];
     const char *fname, *msg, *custom_msg, *keyword;
     int min = INT_MAX;
-    int i, len, nargs, nkeywords;
+    int max = INT_MAX;
+    int i, len;
+    Py_ssize_t nargs, nkeywords;
     PyObject *current_arg;
-    freelist_t freelist = {0, NULL};
+    freelistentry_t static_entries[STATIC_FREELIST_ENTRIES];
+    freelist_t freelist;
+
+    freelist.entries = static_entries;
+    freelist.first_available = 0;
+    freelist.entries_malloced = 0;
 
     assert(args != NULL && PyTuple_Check(args));
     assert(keywords == NULL || PyDict_Check(keywords));
@@ -1409,17 +1469,20 @@
     for (len=0; kwlist[len]; len++)
         continue;
 
-    freelist.entries = PyMem_NEW(freelistentry_t, len);
-    if (freelist.entries == NULL) {
-        PyErr_NoMemory();
-        return 0;
+    if (len > STATIC_FREELIST_ENTRIES) {
+        freelist.entries = PyMem_NEW(freelistentry_t, len);
+        if (freelist.entries == NULL) {
+            PyErr_NoMemory();
+            return 0;
+        }
+        freelist.entries_malloced = 1;
     }
 
     nargs = PyTuple_GET_SIZE(args);
     nkeywords = (keywords == NULL) ? 0 : PyDict_Size(keywords);
     if (nargs + nkeywords > len) {
-        PyErr_Format(PyExc_TypeError, "%s%s takes at most %d "
-                     "argument%s (%d given)",
+        PyErr_Format(PyExc_TypeError,
+                     "%s%s takes at most %d argument%s (%zd given)",
                      (fname == NULL) ? "function" : fname,
                      (fname == NULL) ? "" : "()",
                      len,
@@ -1432,8 +1495,39 @@
     for (i = 0; i < len; i++) {
         keyword = kwlist[i];
         if (*format == '|') {
+            if (min != INT_MAX) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "Invalid format string (| specified twice)");
+                return cleanreturn(0, &freelist);
+            }
+
             min = i;
             format++;
+
+            if (max != INT_MAX) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "Invalid format string ($ before |)");
+                return cleanreturn(0, &freelist);
+            }
+        }
+        if (*format == '$') {
+            if (max != INT_MAX) {
+                PyErr_SetString(PyExc_RuntimeError,
+                                "Invalid format string ($ specified twice)");
+                return cleanreturn(0, &freelist);
+            }
+
+            max = i;
+            format++;
+
+            if (max < nargs) {
+                PyErr_Format(PyExc_TypeError,
+                             "Function takes %s %d positional arguments"
+                             " (%d given)",
+                             (min != INT_MAX) ? "at most" : "exactly",
+                             max, nargs);
+                return cleanreturn(0, &freelist);
+            }
         }
         if (IS_END_OF_FORMAT(*format)) {
             PyErr_Format(PyExc_RuntimeError,
@@ -1494,7 +1588,7 @@
         }
     }
 
-    if (!IS_END_OF_FORMAT(*format) && *format != '|') {
+    if (!IS_END_OF_FORMAT(*format) && (*format != '|') && (*format != '$')) {
         PyErr_Format(PyExc_RuntimeError,
             "more argument specifiers than keyword list entries "
             "(remaining format:'%s')", format);
@@ -1507,12 +1601,13 @@
         Py_ssize_t pos = 0;
         while (PyDict_Next(keywords, &pos, &key, &value)) {
             int match = 0;
-            char *ks;
+            char* ks;
             if (!PyUnicode_Check(key)) {
                 PyErr_SetString(PyExc_TypeError,
                                 "keywords must be strings");
                 return cleanreturn(0, &freelist);
             }
+
             /* check that _PyUnicode_AsString() result is not NULL */
             ks = _PyUnicode_AsString(key);
             if (ks != NULL) {
@@ -1545,8 +1640,10 @@
 
     switch (c) {
 
-    /* simple codes
-     * The individual types (second arg of va_arg) are irrelevant */
+    /*
+     * codes that take a single data pointer as an argument
+     * (the type of the pointer is irrelevant)
+     */
 
     case 'b': /* byte -- very short int */
     case 'B': /* byte as bitfield */
@@ -1560,22 +1657,21 @@
     case 'L': /* PY_LONG_LONG */
     case 'K': /* PY_LONG_LONG sized bitfield */
 #endif
+    case 'n': /* Py_ssize_t */
     case 'f': /* float */
     case 'd': /* double */
     case 'D': /* complex double */
     case 'c': /* char */
     case 'C': /* unicode char */
+    case 'p': /* boolean predicate */
+    case 'S': /* string object */
+    case 'Y': /* string object */
+    case 'U': /* unicode string object */
         {
             (void) va_arg(*p_va, void *);
             break;
         }
 
-    case 'n': /* Py_ssize_t */
-        {
-            (void) va_arg(*p_va, Py_ssize_t *);
-            break;
-        }
-
     /* string codes */
 
     case 'e': /* string with encoding */
@@ -1608,16 +1704,6 @@
             break;
         }
 
-    /* object codes */
-
-    case 'S': /* string object */
-    case 'Y': /* string object */
-    case 'U': /* unicode string object */
-        {
-            (void) va_arg(*p_va, PyObject **);
-            break;
-        }
-
     case 'O': /* object */
         {
             if (*format == '!') {
@@ -1731,7 +1817,7 @@
 
 /* For type constructors that don't take keyword args
  *
- * Sets a TypeError and returns 0 if the kwds dict is
+ * Sets a TypeError and returns 0 if the args/kwargs is
  * not empty, returns 1 otherwise
  */
 int
@@ -1750,6 +1836,25 @@
                     funcname);
     return 0;
 }
+
+
+int
+_PyArg_NoPositional(const char *funcname, PyObject *args)
+{
+    if (args == NULL)
+        return 1;
+    if (!PyTuple_CheckExact(args)) {
+        PyErr_BadInternalCall();
+        return 0;
+    }
+    if (PyTuple_GET_SIZE(args) == 0)
+        return 1;
+
+    PyErr_Format(PyExc_TypeError, "%s does not take positional arguments",
+                    funcname);
+    return 0;
+}
+
 #ifdef __cplusplus
 };
 #endif

From pypy.commits at gmail.com  Tue Nov 14 20:14:11 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:11 -0800 (PST)
Subject: [pypy-commit] pypy vmprof-enable-kwargs: a branch where to
 implement the same _vmprof logic as in vmprof's enable-kwargs branch
Message-ID: <5a0b94e3.08b51c0a.c1328.e8cf@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: vmprof-enable-kwargs
Changeset: r93026:ef1507afe550
Date: 2017-11-10 16:42 +0100
http://bitbucket.org/pypy/pypy/changeset/ef1507afe550/

Log:	a branch where to implement the same _vmprof logic as in vmprof's
	enable-kwargs branch


From pypy.commits at gmail.com  Tue Nov 14 20:14:13 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:13 -0800 (PST)
Subject: [pypy-commit] pypy default: improve the test by also checking the
 full call-stack at various points. Probably it does not test anything more
 than before, but it is a good aid when you read it
Message-ID: <5a0b94e5.52c6df0a.2c65f.64fb@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r93027:b7758cca88a3
Date: 2017-11-13 18:40 +0100
http://bitbucket.org/pypy/pypy/changeset/b7758cca88a3/

Log:	improve the test by also checking the full call-stack at various
	points. Probably it does not test anything more than before, but it
	is a good aid when you read it

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -340,16 +340,41 @@
         import sys
         from _continuation import continulet
         #
+        def stack(f=None):
+            """
+            get the call-stack of the caller or the specified frame
+            """
+            if f is None:
+                f = sys._getframe(1)
+            res = []
+            seen = set()
+            while f:
+                if f in seen:
+                    # frame loop
+                    res.append('...')
+                    break
+                seen.add(f)
+                res.append(f.f_code.co_name)
+                f = f.f_back
+            #print res
+            return res
+
         def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
+            #
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
             c.switch(sys._getframe(1).f_back)
+            #
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
             assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
+        assert stack() == ['test_f_back']
         c = continulet(foo)
         f1_bar = c.switch()
         assert f1_bar.f_code.co_name == 'bar'
@@ -358,14 +383,20 @@
         f3_foo = c.switch()
         assert f3_foo is f2_foo
         assert f1_bar.f_back is f3_foo
+        #
         def main():
             f4_main = c.switch()
             assert f4_main.f_code.co_name == 'main'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main', 'test_f_back']
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         def main2():
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         main()
         main2()
         res = c.switch()

From pypy.commits at gmail.com  Tue Nov 14 20:14:20 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:20 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop: WIP: refactor test_xxx
 into a proper failing tests, with a decent name and real asserts
Message-ID: <5a0b94ec.09a0df0a.5b3a0.1732@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop
Changeset: r93030:3e4c6ca55d1d
Date: 2017-11-15 01:54 +0100
http://bitbucket.org/pypy/pypy/changeset/3e4c6ca55d1d/

Log:	WIP: refactor test_xxx into a proper failing tests, with a decent
	name and real asserts

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -8,6 +8,28 @@
         cls.w_translated = cls.space.wrap(
             os.path.join(os.path.dirname(__file__),
                          'test_translated.py'))
+        cls.w_stack = cls.space.appexec([], """():
+            import sys
+            def stack(f=None):
+                '''
+                get the call-stack of the caller or the specified frame
+                '''
+                if f is None:
+                    f = sys._getframe(1)
+                res = []
+                seen = set()
+                while f:
+                    if f in seen:
+                        # frame cycle (shouldn't happen)
+                        res.append('...')
+                        break
+                    seen.add(f)
+                    res.append(f.f_code.co_name)
+                    f = f.f_back
+                #print res
+                return res
+            return stack
+       """)
 
     def test_new_empty(self):
         from _continuation import continulet
@@ -336,70 +358,31 @@
         assert res == 2002
         assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
-    def test_xxx(self):
+    def test_f_back_no_cycles(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
-        def stack(f=None):
-            """
-            get the call-stack of the caller or the specified frame
-            """
-            if f is None:
-                f = sys._getframe(1)
-            res = []
-            seen = set()
-            while f:
-                if f in seen:
-                    # frame loop
-                    res.append('...')
-                    break
-                seen.add(f)
-                res.append(f.f_code.co_name)
-                f = f.f_back
-            print res
-            return res
-
         def bar(c):
             f = sys._getframe(0)
-            print 'bar 1'
+            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
             c.switch(f)
-            print 'bar 2'
+            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
         def foo(c):
             bar(c)
-
-        print
+        #
         c = continulet(foo)
-        print 'test 1'
+        assert stack() == ['test_f_back_no_cycles']
         f = c.switch()
-        print 'test 2'
-        xxx = c.switch()
-        print 'xxx', xxx
-        #stack()
-        #stack(f)
+        assert stack() == ['test_f_back_no_cycles']
+        assert stack(f) == ['bar', 'foo']
+        c.switch()
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
-        def stack(f=None):
-            """
-            get the call-stack of the caller or the specified frame
-            """
-            if f is None:
-                f = sys._getframe(1)
-            res = []
-            seen = set()
-            while f:
-                if f in seen:
-                    # frame loop
-                    res.append('...')
-                    break
-                seen.add(f)
-                res.append(f.f_code.co_name)
-                f = f.f_back
-            #print res
-            return res
-
         def bar(c):
             assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
@@ -436,6 +419,7 @@
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main2', 'test_f_back']
             assert stack(f1_bar) == ['bar', 'foo', '...']
         #
         main()

From pypy.commits at gmail.com  Tue Nov 14 20:14:16 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:16 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop: temporary checkin with
 some debugging stuff + a new logic to avoid building cycles of frames
Message-ID: <5a0b94e8.c380df0a.2925d.3942@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop
Changeset: r93028:f7a1a6eb6908
Date: 2017-11-14 11:53 +0100
http://bitbucket.org/pypy/pypy/changeset/f7a1a6eb6908/

Log:	temporary checkin with some debugging stuff + a new logic to avoid
	building cycles of frames

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -41,13 +41,16 @@
         bottomframe.locals_cells_stack_w[3] = w_kwds
         bottomframe.last_exception = get_cleared_operation_error(space)
         self.bottomframe = bottomframe
+        self.topframe = sthread.ec.topframeref # XXX?
         #
         global_state.origin = self
         self.sthread = sthread
+        pstack(self, 'descr_init')
         h = sthread.new(new_stacklet_callback)
         post_switch(sthread, h)
 
     def switch(self, w_to):
+        #import pdb;pdb.set_trace()
         sthread = self.sthread
         to = self.space.interp_w(W_Continulet, w_to, can_be_None=True)
         if to is not None and to.sthread is None:
@@ -76,9 +79,11 @@
         global_state.origin = self
         if to is None:
             # simple switch: going to self.h
+            #print 'simple switch'
             global_state.destination = self
         else:
             # double switch: the final destination is to.h
+            #print 'double switch'
             global_state.destination = to
         #
         h = sthread.switch(global_state.destination.h)
@@ -217,6 +222,23 @@
 global_state.clear()
 
 
+def pstack(cont, message=''):
+    return
+    if message:
+        print message
+    if isinstance(cont, jit.DirectJitVRef):
+        f = cont()
+    else:
+        f = cont.bottomframe
+    i = 0
+    while f:
+        print '   ', f.pycode.co_name
+        f = f.f_backref()
+        i += 1
+        if i == 10:
+            break
+    print
+
 def new_stacklet_callback(h, arg):
     self = global_state.origin
     self.h = h
@@ -225,6 +247,7 @@
         frame = self.bottomframe
         w_result = frame.execute_frame()
     except Exception as e:
+        #import pdb;pdb.xpm()
         global_state.propagate_exception = e
     else:
         global_state.w_value = w_result
@@ -236,15 +259,32 @@
 def post_switch(sthread, h):
     origin = global_state.origin
     self = global_state.destination
+    #import pdb;pdb.set_trace()
     global_state.origin = None
     global_state.destination = None
     self.h, origin.h = origin.h, h
     #
     current = sthread.ec.topframeref
-    sthread.ec.topframeref = self.bottomframe.f_backref
-    self.bottomframe.f_backref = origin.bottomframe.f_backref
-    origin.bottomframe.f_backref = current
+    print '==== SWITCH ===='
+    pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
+    pstack(self, 'self')
+
+    # ORGINAL
+    ## sthread.ec.topframeref = self.bottomframe.f_backref
+    ## self.bottomframe.f_backref = origin.bottomframe.f_backref
+    ## origin.bottomframe.f_backref = current
+
+    # antocuni
+    sthread.ec.topframeref = self.topframe
+    self.topframe = origin.topframe
+    origin.topframe = current
+
     #
+    print 'swap'
+    pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
+    pstack(self, 'self')
+    print '==== END SWITCH ===='
+    print
     return get_result()
 
 def get_result():
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -336,6 +336,47 @@
         assert res == 2002
         assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
+    def test_xxx(self):
+        import sys
+        from _continuation import continulet
+        #
+        def stack(f=None):
+            """
+            get the call-stack of the caller or the specified frame
+            """
+            if f is None:
+                f = sys._getframe(1)
+            res = []
+            seen = set()
+            while f:
+                if f in seen:
+                    # frame loop
+                    res.append('...')
+                    break
+                seen.add(f)
+                res.append(f.f_code.co_name)
+                f = f.f_back
+            print res
+            return res
+
+        def bar(c):
+            f = sys._getframe(0)
+            print 'bar 1'
+            c.switch(f)
+            print 'bar 2'
+        def foo(c):
+            bar(c)
+
+        print
+        c = continulet(foo)
+        print 'test 1'
+        f = c.switch()
+        print 'test 2'
+        xxx = c.switch()
+        print 'xxx', xxx
+        #stack()
+        #stack(f)
+
     def test_f_back(self):
         import sys
         from _continuation import continulet

From pypy.commits at gmail.com  Tue Nov 14 20:14:22 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:22 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop: add a passing test to
 check that we stick the continulet stack at the right position of the
 f_back chain
Message-ID: <5a0b94ee.52c6df0a.2c65f.6509@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop
Changeset: r93031:82b54bb1e271
Date: 2017-11-15 02:01 +0100
http://bitbucket.org/pypy/pypy/changeset/82b54bb1e271/

Log:	add a passing test to check that we stick the continulet stack at
	the right position of the f_back chain

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -378,6 +378,34 @@
         assert stack(f) == ['bar', 'foo']
         c.switch()
 
+    def test_f_back_proper_chaining(self):
+        import sys
+        from _continuation import continulet
+        stack = self.stack
+        #
+        seen = []
+        def bar(c):
+            seen.append(2)
+            assert stack() == ['bar', 'foo', 'test_f_back_proper_chaining']
+            c.switch()
+            seen.append(5)
+            assert stack() == ['bar', 'foo', 'main',
+                               'test_f_back_proper_chaining']
+        def foo(c):
+            bar(c)
+        def main(c):
+            seen.append(4)
+            assert stack() == ['main', 'test_f_back_proper_chaining']
+            c.switch()
+            seen.append(6)
+
+        c = continulet(foo)
+        seen.append(1)
+        c.switch()
+        seen.append(3)
+        f = main(c)
+        assert seen == [1, 2, 3, 4, 5, 6]
+
     def test_f_back(self):
         import sys
         from _continuation import continulet

From pypy.commits at gmail.com  Tue Nov 14 20:14:18 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:18 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop: WIP: add two temporary
 flags which allow to select: 1) the old switch logic vs the new one and 2)
 whether to print or not debug infos
Message-ID: <5a0b94ea.26acdf0a.6d561.e079@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop
Changeset: r93029:ba1ff85734b9
Date: 2017-11-15 01:43 +0100
http://bitbucket.org/pypy/pypy/changeset/ba1ff85734b9/

Log:	WIP: add two temporary flags which allow to select: 1) the old
	switch logic vs the new one and 2) whether to print or not debug
	infos

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -41,7 +41,7 @@
         bottomframe.locals_cells_stack_w[3] = w_kwds
         bottomframe.last_exception = get_cleared_operation_error(space)
         self.bottomframe = bottomframe
-        self.topframe = sthread.ec.topframeref # XXX?
+        self.topframeref = sthread.ec.topframeref
         #
         global_state.origin = self
         self.sthread = sthread
@@ -79,11 +79,9 @@
         global_state.origin = self
         if to is None:
             # simple switch: going to self.h
-            #print 'simple switch'
             global_state.destination = self
         else:
             # double switch: the final destination is to.h
-            #print 'double switch'
             global_state.destination = to
         #
         h = sthread.switch(global_state.destination.h)
@@ -221,24 +219,6 @@
 global_state = GlobalState()
 global_state.clear()
 
-
-def pstack(cont, message=''):
-    return
-    if message:
-        print message
-    if isinstance(cont, jit.DirectJitVRef):
-        f = cont()
-    else:
-        f = cont.bottomframe
-    i = 0
-    while f:
-        print '   ', f.pycode.co_name
-        f = f.f_backref()
-        i += 1
-        if i == 10:
-            break
-    print
-
 def new_stacklet_callback(h, arg):
     self = global_state.origin
     self.h = h
@@ -256,35 +236,66 @@
     global_state.destination = self
     return self.h
 
+DEBUG = False
+ORIGINAL = False
+
+def log(x=''):
+    if DEBUG:
+        print x
+
+def pstack(cont, message=''):
+    """
+    NOTE: I don't know exactly why, but sometimes if you pstack() a sthread or
+    a frame, then later you get an InvalidVirtualRef exception. So, in
+    general, this is a useful debugging tool but don't expect your tests to
+    pass if you call it. Put DEBUG=False to disable.
+    """
+    if not DEBUG:
+        return
+    if message:
+        print message
+    if isinstance(cont, jit.DirectJitVRef):
+        f = cont()
+    else:
+        f = cont.bottomframe
+    i = 0
+    while f:
+        print '   ', f.pycode.co_name
+        f = f.f_backref()
+        i += 1
+        if i == 10:
+            break
+    print
+
 def post_switch(sthread, h):
     origin = global_state.origin
     self = global_state.destination
-    #import pdb;pdb.set_trace()
     global_state.origin = None
     global_state.destination = None
     self.h, origin.h = origin.h, h
     #
     current = sthread.ec.topframeref
-    print '==== SWITCH ===='
+    lo
+    g('==== SWITCH ====')
     pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
     pstack(self, 'self')
 
-    # ORGINAL
-    ## sthread.ec.topframeref = self.bottomframe.f_backref
-    ## self.bottomframe.f_backref = origin.bottomframe.f_backref
-    ## origin.bottomframe.f_backref = current
-
-    # antocuni
-    sthread.ec.topframeref = self.topframe
-    self.topframe = origin.topframe
-    origin.topframe = current
-
+    if ORIGINAL:
+        sthread.ec.topframeref = self.bottomframe.f_backref
+        self.bottomframe.f_backref = origin.bottomframe.f_backref
+        origin.bottomframe.f_backref = current
+    else:
+        # antocuni
+        sthread.ec.topframeref = self.topframeref
+        self.topframeref = origin.topframeref
+        origin.topframeref = current
     #
-    print 'swap'
+    log('swap')
     pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
-    pstack(self, 'self')
-    print '==== END SWITCH ===='
-    print
+    pstack(self
+    , 'self')
+    log('==== END SWITCH ====')
+    log()
     return get_result()
 
 def get_result():
@@ -315,7 +326,8 @@
             if cont.sthread is None:
                 continue   # ignore non-initialized continulets
             else:
-                raise geterror(space, "inter-thread support is missing")
+                raise geterror(space
+    , "inter-thread support is missing")
         elif sthread.is_empty_handle(cont.h):
             raise geterror(space, "got an already-finished continulet")
         contlist.append(cont)

From pypy.commits at gmail.com  Tue Nov 14 20:14:24 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:24 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop: introduce the concept
 of running/paused continulet, depending on bottomframe.f_backref;
 fix the post_switch() logic to build the f_back chain correctly;
 finally fix test_f_back to check that we do NOT build cycles of frames
Message-ID: <5a0b94f0.ddb1df0a.34cbb.596d@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop
Changeset: r93032:d5212118820d
Date: 2017-11-15 02:11 +0100
http://bitbucket.org/pypy/pypy/changeset/d5212118820d/

Log:	introduce the concept of running/paused continulet, depending on
	bottomframe.f_backref; fix the post_switch() logic to build the
	f_back chain correctly; finally fix test_f_back to check that we do
	NOT build cycles of frames

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -17,6 +17,8 @@
         # states:
         #  - not init'ed: self.sthread == None
         #  - normal:      self.sthread != None, not is_empty_handle(self.h)
+        #      * running: self.bottomframe.f_backref is not vref_None
+        #      * paused:  self.bottomframe.f_backref is vref_None
         #  - finished:    self.sthread != None, is_empty_handle(self.h)
 
     def check_sthread(self):
@@ -275,8 +277,7 @@
     self.h, origin.h = origin.h, h
     #
     current = sthread.ec.topframeref
-    lo
-    g('==== SWITCH ====')
+    log('==== SWITCH ====')
     pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
     pstack(self, 'self')
 
@@ -285,15 +286,20 @@
         self.bottomframe.f_backref = origin.bottomframe.f_backref
         origin.bottomframe.f_backref = current
     else:
-        # antocuni
         sthread.ec.topframeref = self.topframeref
         self.topframeref = origin.topframeref
+        self.bottomframe.f_backref = origin.bottomframe.f_backref
         origin.topframeref = current
+        if origin.bottomframe.f_backref is jit.vref_None:
+            # paused ==> running: build the f_back link
+            origin.bottomframe.f_backref = current
+        else:
+            # running ==> paused: break the f_back link
+            origin.bottomframe.f_backref = jit.vref_None
     #
     log('swap')
     pstack(sthread.ec.topframeref, 'sthread.ec.topframeref')
-    pstack(self
-    , 'self')
+    pstack(self, 'self')
     log('==== END SWITCH ====')
     log()
     return get_result()
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -406,27 +406,27 @@
         f = main(c)
         assert seen == [1, 2, 3, 4, 5, 6]
 
-    def test_f_back(self):
+    def test_f_back_complex(self):
         import sys
         from _continuation import continulet
         stack = self.stack
         #
         def bar(c):
-            assert stack() == ['bar', 'foo', 'test_f_back']
+            assert stack() == ['bar', 'foo', 'test_f_back_complex']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             #
-            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex']
             c.switch(sys._getframe(1).f_back)
             #
-            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex']
             assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
-        assert stack() == ['test_f_back']
+        assert stack() == ['test_f_back_complex']
         c = continulet(foo)
         f1_bar = c.switch()
         assert f1_bar.f_code.co_name == 'bar'
@@ -439,16 +439,16 @@
         def main():
             f4_main = c.switch()
             assert f4_main.f_code.co_name == 'main'
-            assert f3_foo.f_back is f1_bar    # not running, so a loop
-            assert stack() == ['main', 'test_f_back']
-            assert stack(f1_bar) == ['bar', 'foo', '...']
+            assert f3_foo.f_back is None    # not running
+            assert stack() == ['main', 'test_f_back_complex']
+            assert stack(f1_bar) == ['bar', 'foo']
         #
         def main2():
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
-            assert f3_foo.f_back is f1_bar    # not running, so a loop
-            assert stack() == ['main2', 'test_f_back']
-            assert stack(f1_bar) == ['bar', 'foo', '...']
+            assert f3_foo.f_back is None    # not running
+            assert stack() == ['main2', 'test_f_back_complex']
+            assert stack(f1_bar) == ['bar', 'foo']
         #
         main()
         main2()

From pypy.commits at gmail.com  Tue Nov 14 20:14:27 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 14 Nov 2017 17:14:27 -0800 (PST)
Subject: [pypy-commit] pypy default: merge heads
Message-ID: <5a0b94f3.42da1c0a.3b8a9.9a94@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r93033:821e59360f37
Date: 2017-11-15 02:13 +0100
http://bitbucket.org/pypy/pypy/changeset/821e59360f37/

Log:	merge heads

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -340,16 +340,41 @@
         import sys
         from _continuation import continulet
         #
+        def stack(f=None):
+            """
+            get the call-stack of the caller or the specified frame
+            """
+            if f is None:
+                f = sys._getframe(1)
+            res = []
+            seen = set()
+            while f:
+                if f in seen:
+                    # frame loop
+                    res.append('...')
+                    break
+                seen.add(f)
+                res.append(f.f_code.co_name)
+                f = f.f_back
+            #print res
+            return res
+
         def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
+            #
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
             c.switch(sys._getframe(1).f_back)
+            #
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
             assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
+        assert stack() == ['test_f_back']
         c = continulet(foo)
         f1_bar = c.switch()
         assert f1_bar.f_code.co_name == 'bar'
@@ -358,14 +383,20 @@
         f3_foo = c.switch()
         assert f3_foo is f2_foo
         assert f1_bar.f_back is f3_foo
+        #
         def main():
             f4_main = c.switch()
             assert f4_main.f_code.co_name == 'main'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main', 'test_f_back']
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         def main2():
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         main()
         main2()
         res = c.switch()

From pypy.commits at gmail.com  Wed Nov 15 10:07:19 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:19 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: make sure that
 vmprof don't sample the stack in the middle of stacklet switching,
 else it reads nonsense and BOOM
Message-ID: <5a0c5827.0e97df0a.8f39d.3c79@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch
Changeset: r93034:3e4e9ff62be1
Date: 2017-11-15 12:59 +0100
http://bitbucket.org/pypy/pypy/changeset/3e4e9ff62be1/

Log:	make sure that vmprof don't sample the stack in the middle of
	stacklet switching, else it reads nonsense and BOOM

diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,6 +3,7 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rlib import rvmprof
 from rpython.rlib.rvmprof import cintf
 
 DEBUG = False
@@ -40,11 +41,13 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
+        rvmprof.stop_sampling(space=None) # XXX
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
+            rvmprof.start_sampling(space=None) # XXX
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py
--- a/rpython/rlib/test/test_rstacklet.py
+++ b/rpython/rlib/test/test_rstacklet.py
@@ -10,6 +10,8 @@
 from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT
 from rpython.rlib import rrandom, rgc
 from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.nonconst import NonConstant
+from rpython.rlib import rvmprof
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.translator.c.test.test_standalone import StandaloneTests
 
@@ -273,7 +275,23 @@
         llmemory.raw_free(raw)
 
 
+# <vmprof-hack>
+# bah, we need to make sure that vmprof_execute_code is annotated, else
+# rvmprof.c does not compile correctly
+class FakeVMProfCode(object):
+    pass
+rvmprof.register_code_object_class(MyCode, lambda code: 'name')
+ at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
+def fake_vmprof_main(code, num):
+    return 42
+# </vmprof-hack>
+
 def entry_point(argv):
+    # <vmprof-hack>
+    if NonConstant(False):
+        fake_vmprof_main(FakeVMProfCode(), 42)
+    # </vmprof-hack>
+    #
     seed = 0
     if len(argv) > 1:
         seed = int(argv[1])

From pypy.commits at gmail.com  Wed Nov 15 10:07:22 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:22 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: fix typo
Message-ID: <5a0c582a.f2a9df0a.850f5.445e@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch
Changeset: r93035:1b1bdd6c7f39
Date: 2017-11-15 12:05 +0000
http://bitbucket.org/pypy/pypy/changeset/1b1bdd6c7f39/

Log:	fix typo

diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py
--- a/rpython/rlib/test/test_rstacklet.py
+++ b/rpython/rlib/test/test_rstacklet.py
@@ -280,7 +280,7 @@
 # rvmprof.c does not compile correctly
 class FakeVMProfCode(object):
     pass
-rvmprof.register_code_object_class(MyCode, lambda code: 'name')
+rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name')
 @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
 def fake_vmprof_main(code, num):
     return 42

From pypy.commits at gmail.com  Wed Nov 15 10:07:24 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:24 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: add a test which is
 failing (i.e., segfaulting) on default,
 and that it seems to be fixed on this branch
Message-ID: <5a0c582c.01ed1c0a.a3420.6656@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch
Changeset: r93036:52a6650ba479
Date: 2017-11-15 15:49 +0100
http://bitbucket.org/pypy/pypy/changeset/52a6650ba479/

Log:	add a test which is failing (i.e., segfaulting) on default, and that
	it seems to be fixed on this branch

diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -5,6 +5,7 @@
     py.test.skip("to run on top of a translated pypy-c")
 
 import sys, random
+from rpython.tool.udir import udir
 
 # ____________________________________________________________
 
@@ -92,6 +93,33 @@
         from pypy.conftest import option
         if not option.runappdirect:
             py.test.skip("meant only for -A run")
+        cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof')))
+
+    def test_vmprof(self):
+        """
+        The point of this test is to check that we do NOT segfault.  In
+        particular, we need to ensure that vmprof does not sample the stack in
+        the middle of a switch, else we read nonsense.
+        """
+        try:
+            import _vmprof
+        except ImportError:
+            py.test.skip("no _vmprof")
+        #
+        def switch_forever(c):
+            while True:
+                c.switch()
+        #
+        f = open(self.vmprof_file, 'w+b')
+        _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False)
+        c = _continuation.continulet(switch_forever)
+        for i in range(10**7):
+            if i % 100000 == 0:
+                print i
+            c.switch()
+        _vmprof.disable()
+        f.close()
+
 
 def _setup():
     for _i in range(20):
@@ -124,7 +152,8 @@
     try:
         import thread
     except ImportError:
-        py.test.skip("no threads")
+        py.test.ski
+        p("no threads")
     ts = [ThreadTest(thread.allocate_lock()) for i in range(5)]
     for t in ts:
         thread.start_new_thread(t.run, ())

From pypy.commits at gmail.com  Wed Nov 15 10:07:26 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:26 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: remove the space
 param from rvmprof.{start,
 stop}_sampling: it is not used and also it does not make sense to have it
 inside rlib
Message-ID: <5a0c582e.099fdf0a.c3df7.e37c@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch
Changeset: r93037:46ee55287ed4
Date: 2017-11-15 16:03 +0100
http://bitbucket.org/pypy/pypy/changeset/46ee55287ed4/

Log:	remove the space param from rvmprof.{start,stop}_sampling: it is not
	used and also it does not make sense to have it inside rlib

diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py
--- a/pypy/module/_vmprof/interp_vmprof.py
+++ b/pypy/module/_vmprof/interp_vmprof.py
@@ -93,8 +93,8 @@
     return space.newtext(path)
 
 def stop_sampling(space):
-    return space.newint(rvmprof.stop_sampling(space))
+    return space.newint(rvmprof.stop_sampling())
 
 def start_sampling(space):
-    rvmprof.start_sampling(space)
+    rvmprof.start_sampling()
     return space.w_None
diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -41,13 +41,13 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
-        rvmprof.stop_sampling(space=None) # XXX
+        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
-            rvmprof.start_sampling(space=None) # XXX
+            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -55,9 +55,9 @@
 
     return None
 
-def stop_sampling(space):
+def stop_sampling():
     fd = _get_vmprof().cintf.vmprof_stop_sampling()
     return rffi.cast(lltype.Signed, fd)
 
-def start_sampling(space):
+def start_sampling():
     _get_vmprof().cintf.vmprof_start_sampling()

From pypy.commits at gmail.com  Wed Nov 15 10:07:27 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:27 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: close merged branch
Message-ID: <5a0c582f.7a86df0a.a4821.7ca4@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch
Changeset: r93038:2c8aa32187c0
Date: 2017-11-15 16:04 +0100
http://bitbucket.org/pypy/pypy/changeset/2c8aa32187c0/

Log:	close merged branch


From pypy.commits at gmail.com  Wed Nov 15 10:07:30 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 07:07:30 -0800 (PST)
Subject: [pypy-commit] pypy default: merge the fix-vmprof-stacklet-switch:
 make sure that vmprof does not segfault in presence of continuation.switch
 (and thus with greenlets, eventlet, etc.)
Message-ID: <5a0c5832.45c1df0a.c8bc7.467c@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r93039:927cc69f4d52
Date: 2017-11-15 16:06 +0100
http://bitbucket.org/pypy/pypy/changeset/927cc69f4d52/

Log:	merge the fix-vmprof-stacklet-switch: make sure that vmprof does not
	segfault in presence of continuation.switch (and thus with
	greenlets, eventlet, etc.)

diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -5,6 +5,7 @@
     py.test.skip("to run on top of a translated pypy-c")
 
 import sys, random
+from rpython.tool.udir import udir
 
 # ____________________________________________________________
 
@@ -92,6 +93,33 @@
         from pypy.conftest import option
         if not option.runappdirect:
             py.test.skip("meant only for -A run")
+        cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof')))
+
+    def test_vmprof(self):
+        """
+        The point of this test is to check that we do NOT segfault.  In
+        particular, we need to ensure that vmprof does not sample the stack in
+        the middle of a switch, else we read nonsense.
+        """
+        try:
+            import _vmprof
+        except ImportError:
+            py.test.skip("no _vmprof")
+        #
+        def switch_forever(c):
+            while True:
+                c.switch()
+        #
+        f = open(self.vmprof_file, 'w+b')
+        _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False)
+        c = _continuation.continulet(switch_forever)
+        for i in range(10**7):
+            if i % 100000 == 0:
+                print i
+            c.switch()
+        _vmprof.disable()
+        f.close()
+
 
 def _setup():
     for _i in range(20):
diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py
--- a/pypy/module/_vmprof/interp_vmprof.py
+++ b/pypy/module/_vmprof/interp_vmprof.py
@@ -93,8 +93,8 @@
     return space.newtext(path)
 
 def stop_sampling(space):
-    return space.newint(rvmprof.stop_sampling(space))
+    return space.newint(rvmprof.stop_sampling())
 
 def start_sampling(space):
-    rvmprof.start_sampling(space)
+    rvmprof.start_sampling()
     return space.w_None
diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,6 +3,7 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rlib import rvmprof
 from rpython.rlib.rvmprof import cintf
 
 DEBUG = False
@@ -40,11 +41,13 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
+        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
+            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -55,9 +55,9 @@
 
     return None
 
-def stop_sampling(space):
+def stop_sampling():
     fd = _get_vmprof().cintf.vmprof_stop_sampling()
     return rffi.cast(lltype.Signed, fd)
 
-def start_sampling(space):
+def start_sampling():
     _get_vmprof().cintf.vmprof_start_sampling()
diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py
--- a/rpython/rlib/test/test_rstacklet.py
+++ b/rpython/rlib/test/test_rstacklet.py
@@ -10,6 +10,8 @@
 from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT
 from rpython.rlib import rrandom, rgc
 from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.nonconst import NonConstant
+from rpython.rlib import rvmprof
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.translator.c.test.test_standalone import StandaloneTests
 
@@ -273,7 +275,23 @@
         llmemory.raw_free(raw)
 
 
+# <vmprof-hack>
+# bah, we need to make sure that vmprof_execute_code is annotated, else
+# rvmprof.c does not compile correctly
+class FakeVMProfCode(object):
+    pass
+rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name')
+ at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
+def fake_vmprof_main(code, num):
+    return 42
+# </vmprof-hack>
+
 def entry_point(argv):
+    # <vmprof-hack>
+    if NonConstant(False):
+        fake_vmprof_main(FakeVMProfCode(), 42)
+    # </vmprof-hack>
+    #
     seed = 0
     if len(argv) > 1:
         seed = int(argv[1])

From pypy.commits at gmail.com  Wed Nov 15 10:17:22 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 07:17:22 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Merged in thisch/pypy/py3.5 (pull request
 #584)
Message-ID: <5a0c5a82.c6a2df0a.13ea3.d96c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93042:dfff7758834e
Date: 2017-11-15 15:16 +0000
http://bitbucket.org/pypy/pypy/changeset/dfff7758834e/

Log:	Merged in thisch/pypy/py3.5 (pull request #584)

	Change return type of os.times to posix.times_result

diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py
--- a/pypy/module/posix/__init__.py
+++ b/pypy/module/posix/__init__.py
@@ -18,6 +18,7 @@
         'error': 'app_posix.error',
         'stat_result': 'app_posix.stat_result',
         'statvfs_result': 'app_posix.statvfs_result',
+        'times_result': 'app_posix.times_result',
         'uname_result': 'app_posix.uname_result',
         'urandom': 'app_posix.urandom',
         'terminal_size': 'app_posix.terminal_size',
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -122,6 +122,19 @@
 else:
     _validate_fd = validate_fd
 
+
+class times_result(metaclass=structseqtype):
+
+    name = "posix.times_result"
+    __module__ = "posix"
+
+    user = structseqfield(0, "user time")
+    system = structseqfield(1, "system time")
+    children_user = structseqfield(2, "user time of children")
+    children_system = structseqfield(3, "system time of children")
+    elapsed = structseqfield(4, "elapsed time since an arbitray point in the past")
+
+
 if osname == 'posix':
     def wait():
         """ wait() -> (pid, status)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -684,11 +684,17 @@
     except OSError as e:
         raise wrap_oserror(space, e, eintr_retry=False)
     else:
-        return space.newtuple([space.newfloat(times[0]),
-                               space.newfloat(times[1]),
-                               space.newfloat(times[2]),
-                               space.newfloat(times[3]),
-                               space.newfloat(times[4])])
+        w_keywords = space.newdict()
+        w_tuple = space.newtuple([space.newfloat(times[0]),
+                                  space.newfloat(times[1]),
+                                  space.newfloat(times[2]),
+                                  space.newfloat(times[3]),
+                                  space.newfloat(times[4])])
+
+        w_times_result = space.getattr(space.getbuiltinmodule(os.name),
+                                       space.newtext('times_result'))
+        return space.call_function(w_times_result, w_tuple, w_keywords)
+
 
 @unwrap_spec(command='fsencode')
 def system(space, command):
diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -378,16 +378,21 @@
 
     def test_times(self):
         """
-        posix.times() should return a five-tuple giving float-representations
-        (seconds, effectively) of the four fields from the underlying struct
-        tms and the return value.
+        posix.times() should return a posix.times_result object giving 
+        float-representations (seconds, effectively) of the four fields from 
+        the underlying struct tms and the return value.
         """
         result = self.posix.times()
-        assert isinstance(result, tuple)
+        assert isinstance(self.posix.times(), self.posix.times_result)
+        assert isinstance(self.posix.times(), tuple)
         assert len(result) == 5
         for value in result:
             assert isinstance(value, float)
-
+        assert isinstance(result.user, float)
+        assert isinstance(result.system, float)
+        assert isinstance(result.children_user, float)
+        assert isinstance(result.children_system, float)
+        assert isinstance(result.elapsed, float)
 
     def test_strerror(self):
         assert isinstance(self.posix.strerror(0), str)

From pypy.commits at gmail.com  Wed Nov 15 10:17:32 2017
From: pypy.commits at gmail.com (thisch)
Date: Wed, 15 Nov 2017 07:17:32 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Change return type of os.times to
 posix.times_result
Message-ID: <5a0c5a8c.44841c0a.152ca.eb1b@mx.google.com>

Author: Thomas Hisch <t.hisch at gmail.com>
Branch: py3.5
Changeset: r93040:852c26ea2a1c
Date: 2017-11-12 21:49 +0100
http://bitbucket.org/pypy/pypy/changeset/852c26ea2a1c/

Log:	Change return type of os.times to posix.times_result

	The return type was changed in CPython3.3.

	Related: #2375

diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py
--- a/pypy/module/posix/__init__.py
+++ b/pypy/module/posix/__init__.py
@@ -18,6 +18,7 @@
         'error': 'app_posix.error',
         'stat_result': 'app_posix.stat_result',
         'statvfs_result': 'app_posix.statvfs_result',
+        'times_result': 'app_posix.times_result',
         'uname_result': 'app_posix.uname_result',
         'urandom': 'app_posix.urandom',
         'terminal_size': 'app_posix.terminal_size',
diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py
--- a/pypy/module/posix/app_posix.py
+++ b/pypy/module/posix/app_posix.py
@@ -122,6 +122,19 @@
 else:
     _validate_fd = validate_fd
 
+
+class times_result(metaclass=structseqtype):
+
+    name = "posix.times_result"
+    __module__ = "posix"
+
+    user = structseqfield(0, "user time")
+    system = structseqfield(1, "system time")
+    children_user = structseqfield(2, "user time of children")
+    children_system = structseqfield(3, "system time of children")
+    elapsed = structseqfield(4, "elapsed time since an arbitray point in the past")
+
+
 if osname == 'posix':
     def wait():
         """ wait() -> (pid, status)
diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py
--- a/pypy/module/posix/interp_posix.py
+++ b/pypy/module/posix/interp_posix.py
@@ -684,11 +684,17 @@
     except OSError as e:
         raise wrap_oserror(space, e, eintr_retry=False)
     else:
-        return space.newtuple([space.newfloat(times[0]),
-                               space.newfloat(times[1]),
-                               space.newfloat(times[2]),
-                               space.newfloat(times[3]),
-                               space.newfloat(times[4])])
+        w_keywords = space.newdict()
+        w_tuple = space.newtuple([space.newfloat(times[0]),
+                                  space.newfloat(times[1]),
+                                  space.newfloat(times[2]),
+                                  space.newfloat(times[3]),
+                                  space.newfloat(times[4])])
+
+        w_times_result = space.getattr(space.getbuiltinmodule(os.name),
+                                       space.newtext('times_result'))
+        return space.call_function(w_times_result, w_tuple, w_keywords)
+
 
 @unwrap_spec(command='fsencode')
 def system(space, command):

From pypy.commits at gmail.com  Wed Nov 15 10:17:34 2017
From: pypy.commits at gmail.com (thisch)
Date: Wed, 15 Nov 2017 07:17:34 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Improve test for posix.times()
Message-ID: <5a0c5a8e.88acdf0a.4f669.fa2a@mx.google.com>

Author: Thomas Hisch <t.hisch at gmail.com>
Branch: py3.5
Changeset: r93041:898194d1902c
Date: 2017-11-13 23:44 +0100
http://bitbucket.org/pypy/pypy/changeset/898194d1902c/

Log:	Improve test for posix.times()

	Test that posix.times() returns a times_result object.

diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py
--- a/pypy/module/posix/test/test_posix2.py
+++ b/pypy/module/posix/test/test_posix2.py
@@ -378,16 +378,21 @@
 
     def test_times(self):
         """
-        posix.times() should return a five-tuple giving float-representations
-        (seconds, effectively) of the four fields from the underlying struct
-        tms and the return value.
+        posix.times() should return a posix.times_result object giving 
+        float-representations (seconds, effectively) of the four fields from 
+        the underlying struct tms and the return value.
         """
         result = self.posix.times()
-        assert isinstance(result, tuple)
+        assert isinstance(self.posix.times(), self.posix.times_result)
+        assert isinstance(self.posix.times(), tuple)
         assert len(result) == 5
         for value in result:
             assert isinstance(value, float)
-
+        assert isinstance(result.user, float)
+        assert isinstance(result.system, float)
+        assert isinstance(result.children_user, float)
+        assert isinstance(result.children_system, float)
+        assert isinstance(result.elapsed, float)
 
     def test_strerror(self):
         assert isinstance(self.posix.strerror(0), str)

From pypy.commits at gmail.com  Wed Nov 15 11:24:21 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 08:24:21 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix test to work on PyPy
Message-ID: <5a0c6a35.8fa3df0a.b0eb2.5da0@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93043:12f1fb4860ac
Date: 2017-11-15 16:23 +0000
http://bitbucket.org/pypy/pypy/changeset/12f1fb4860ac/

Log:	Fix test to work on PyPy

diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py
--- a/lib-python/3/test/test_inspect.py
+++ b/lib-python/3/test/test_inspect.py
@@ -765,12 +765,15 @@
             self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump,
                                         args_e=['self', 'obj'], formatted='(self, obj)')
 
+        # platform-dependent on PyPy
+        default_fd = os.stat.__kwdefaults__['dir_fd']
+
         self.assertFullArgSpecEquals(
              os.stat,
              args_e=['path'],
              kwonlyargs_e=['dir_fd', 'follow_symlinks'],
-             kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True},
-             formatted='(path, *, dir_fd=None, follow_symlinks=True)')
+             kwonlydefaults_e={'dir_fd': default_fd, 'follow_symlinks': True},
+             formatted='(path, *, dir_fd={}, follow_symlinks=True)'.format(default_fd))
 
     @unittest.skipIf(MISSING_C_DOCSTRINGS,
                      "Signature information for builtins requires docstrings")

From pypy.commits at gmail.com  Wed Nov 15 11:44:17 2017
From: pypy.commits at gmail.com (fijal)
Date: Wed, 15 Nov 2017 08:44:17 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: general progress towards moving
 more of the infrastructure from runicode towards unicodehelper,
 which helps us to deal with surrogates nicely
Message-ID: <5a0c6ee1.7a94500a.793c7.17fe@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93044:1d6d78e72d50
Date: 2017-11-15 17:43 +0100
http://bitbucket.org/pypy/pypy/changeset/1d6d78e72d50/

Log:	general progress towards moving more of the infrastructure from
	runicode towards unicodehelper, which helps us to deal with
	surrogates nicely

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1,6 +1,7 @@
 from pypy.interpreter.error import OperationError
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib import runicode, rutf8
+from rpython.rlib.rarithmetic import r_uint
 from rpython.rlib.rstring import StringBuilder
 from pypy.module._codecs import interp_codecs
 
@@ -43,6 +44,15 @@
     from pypy.objspace.std.unicodeobject import encode_object
     return encode_object(space, w_data, encoding, errors)
 
+def combine_flags(one, two):
+    if one == rutf8.FLAG_ASCII and two == rutf8.FLAG_ASCII:
+        return rutf8.FLAG_ASCII
+    elif (one == rutf8.FLAG_HAS_SURROGATES or
+          two == rutf8.FLAG_HAS_SURROGATES):
+        return rutf8.FLAG_HAS_SURROGATES
+    return rutf8.FLAG_REGULAR
+
+
 def _has_surrogate(u):
     for c in u:
         if 0xD800 <= ord(c) <= 0xDFFF:
@@ -58,25 +68,221 @@
             flag = rutf8.FLAG_REGULAR
     return flag
 
+def hexescape(builder, s, pos, digits,
+              encoding, errorhandler, message, errors):
+    chr = 0
+    if pos + digits > len(s):
+        endinpos = pos
+        while endinpos < len(s) and s[endinpos] in hexdigits:
+            endinpos += 1
+        uuu
+        res, size, pos = errorhandler(errors, encoding,
+                                message, s, pos-2, endinpos)
+        builder.append(res)
+    else:
+        try:
+            chr = r_uint(int(s[pos:pos+digits], 16))
+        except ValueError:
+            aaaa
+            endinpos = pos
+            while s[endinpos] in hexdigits:
+                endinpos += 1
+            res, pos = errorhandler(errors, encoding,
+                                    message, s, pos-2, endinpos)
+            builder.append(res)
+        else:
+            # when we get here, chr is a 32-bit unicode character
+            if chr > 0x10ffff:
+                UUU
+                message = "illegal Unicode character"
+                res, pos = errorhandler(errors, encoding,
+                                        message, s, pos-2, pos+digits)
+                builder.append(res)
+            else:
+                rutf8.unichr_as_utf8_append(builder, chr, True)
+                if chr <= 0x7f:
+                    flag = rutf8.FLAG_ASCII
+                elif 0xd800 <= chr <= 0xdfff:
+                    flag = rutf8.FLAG_HAS_SURROGATES
+                else:
+                    flag = rutf8.FLAG_REGULAR
+                pos += digits
+                size = 1
+                
+    return pos, size, flag
+
+def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler):
+    size = len(s)
+    if size == 0:
+        return '', 0, 0, rutf8.FLAG_ASCII
+
+    flag = rutf8.FLAG_ASCII
+    builder = StringBuilder(size)
+    pos = 0
+    outsize = 0
+    while pos < size:
+        ch = s[pos]
+
+        # Non-escape characters are interpreted as Unicode ordinals
+        if ch != '\\':
+            if ord(ch) > 0x7F:
+                rutf8.unichr_as_utf8_append(builder, ord(ch))
+                flag = combine_flags(rutf8.FLAG_REGULAR, flag)
+            else:
+                builder.append(ch)
+            pos += 1
+            outsize += 1
+            continue
+
+        # - Escapes
+        pos += 1
+        if pos >= size:
+            message = "\\ at end of string"
+            res, pos = errorhandler(errors, "unicodeescape",
+                                    message, s, pos-1, size)
+            newsize, newflag = rutf8.check_utf8(res, True)
+            outsize + newsize
+            flag = combine_flags(flag, newflag)
+            builder.append(res)
+            continue
+
+        ch = s[pos]
+        pos += 1
+        # \x escapes
+        if ch == '\n': pass
+        elif ch == '\\':
+            builder.append('\\')
+            outsize += 1
+        elif ch == '\'':
+            builder.append('\'')
+            outsize += 1
+        elif ch == '\"':
+            builder.append('\"')
+            outsize += 1
+        elif ch == 'b' :
+            builder.append('\b')
+            outsize += 1
+        elif ch == 'f' :
+            builder.append('\f')
+            outsize += 1
+        elif ch == 't' :
+            builder.append('\t')
+            outsize += 1
+        elif ch == 'n' :
+            builder.append('\n')
+            outsize += 1
+        elif ch == 'r' :
+            builder.append('\r')
+            outsize += 1
+        elif ch == 'v' :
+            builder.append('\v')
+            outsize += 1
+        elif ch == 'a' :
+            builder.append('\a')
+            outsize += 1
+        elif '0' <= ch <= '7':
+            x = ord(ch) - ord('0')
+            if pos < size:
+                ch = s[pos]
+                if '0' <= ch <= '7':
+                    pos += 1
+                    x = (x<<3) + ord(ch) - ord('0')
+                    if pos < size:
+                        ch = s[pos]
+                        if '0' <= ch <= '7':
+                            pos += 1
+                            x = (x<<3) + ord(ch) - ord('0')
+            outsize += 1
+            if x >= 0x7F:
+                rutf8.unichr_as_utf8_append(builder, x)
+                flag = combine_flags(rutf8.FLAG_REGULAR, flag)
+            else:
+                builder.append(chr(x))
+        # hex escapes
+        # \xXX
+        elif ch == 'x':
+            digits = 2
+            message = "truncated \\xXX escape"
+            pos, newsize, newflag = hexescape(builder, s, pos, digits,
+                            "unicodeescape", errorhandler, message, errors)
+            flag = combine_flags(flag, newflag)
+            outsize += newsize
+
+        # \uXXXX
+        elif ch == 'u':
+            digits = 4
+            message = "truncated \\uXXXX escape"
+            pos, newsize, newflag = hexescape(builder, s, pos, digits,
+                            "unicodeescape", errorhandler, message, errors)
+            flag = combine_flags(flag, newflag)
+            outsize += newsize
+
+        #  \UXXXXXXXX
+        elif ch == 'U':
+            digits = 8
+            message = "truncated \\UXXXXXXXX escape"
+            pos, newsize, newflag = hexescape(builder, s, pos, digits,
+                            "unicodeescape", errorhandler, message, errors)
+            flag = combine_flags(flag, newflag)
+            outsize += newsize
+
+        # \N{name}
+        elif ch == 'N' and ud_handler is not None:
+            message = "malformed \\N character escape"
+            look = pos
+
+            if look < size and s[look] == '{':
+                # look for the closing brace
+                while look < size and s[look] != '}':
+                    look += 1
+                if look < size and s[look] == '}':
+                    # found a name.  look it up in the unicode database
+                    message = "unknown Unicode character name"
+                    name = s[pos+1:look]
+                    code = ud_handler.call(name)
+                    if code < 0:
+                        res, pos = errorhandler(errors, "unicodeescape",
+                                                message, s, pos-1, look+1)
+                        newsize, newflag = rutf8.check_utf8(res, True)
+                        flag = combine_flags(flag, newflag)
+                        outsize += newsize
+                        builder.append(res)
+                        continue
+                    pos = look + 1
+                    XXX
+                    if code <= MAXUNICODE:
+                        builder.append(UNICHR(code))
+                    else:
+                        code -= 0x10000L
+                        builder.append(unichr(0xD800 + (code >> 10)))
+                        builder.append(unichr(0xDC00 + (code & 0x03FF)))
+                else:
+                    YYY
+                    res, pos = errorhandler(errors, "unicodeescape",
+                                            message, s, pos-1, look+1)
+                    builder.append(res)
+            else:
+                AAA
+                res, pos = errorhandler(errors, "unicodeescape",
+                                        message, s, pos-1, look+1)
+                builder.append(res)
+        else:
+            builder.append('\\')
+            builder.append(ch)
+            outsize += 2
+
+    return builder.build(), pos, outsize, flag
+
 # These functions take and return unwrapped rpython strings and unicodes
 def decode_unicode_escape(space, string):
     state = space.fromcache(interp_codecs.CodecState)
     unicodedata_handler = state.get_unicodedata_handler(space)
-    # XXX pick better length, maybe
-    # XXX that guy does not belong in runicode (nor in rutf8)
-    result_u, consumed = runicode.str_decode_unicode_escape(
-        string, len(string), "strict",
-        final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle,
-        unicodedata_handler=unicodedata_handler)
-    # XXX argh.  we want each surrogate to be encoded separately
-    utf8 = result_u.encode('utf8')
-    if rutf8.first_non_ascii_char(utf8) == -1:
-        flag = rutf8.FLAG_ASCII
-    elif _has_surrogate(result_u):
-        flag = rutf8.FLAG_HAS_SURROGATES
-    else:
-        flag = rutf8.FLAG_REGULAR
-    return utf8, len(result_u), flag
+    result_utf8, consumed, length, flag = str_decode_unicode_escape(
+        string, "strict",
+        final=True,
+        errorhandler=decode_error_handler(space),
+        ud_handler=unicodedata_handler)
+    return result_utf8, length, flag
 
 def decode_raw_unicode_escape(space, string):
     # XXX pick better length, maybe
@@ -111,8 +317,10 @@
     try:
         length, flag = rutf8.check_utf8(string, allow_surrogates=True)
     except rutf8.CheckError as e:
+        # convert position into unicode position
+        lgt, flags = rutf8.check_utf8(string, True, stop=e.pos)
         decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string,
-                                    e.pos, e.pos + 1)
+                                    lgt, lgt + 1)
         assert False, "unreachable"
     return length, flag
 
@@ -131,23 +339,28 @@
     # DEPRECATED
     return (s, check_utf8_or_raise(space, s))
 
-def utf8_encode_ascii(utf8, utf8len, errors, errorhandler):
-    if len(utf8) == utf8len:
-        return utf8
-    # No Way At All to emulate the calls to the error handler in
-    # less than three pages, so better not.
-    u = utf8.decode("utf8")
-    w = EncodeWrapper(errorhandler)
-    return runicode.unicode_encode_ascii(u, len(u), errors, w.handle)
-
-def str_decode_ascii(s, slen, errors, final, errorhandler):
+def str_decode_ascii(s, errors, final, errorhandler):
     try:
         rutf8.check_ascii(s)
-        return s, slen, len(s), rutf8.FLAG_ASCII
+        return s, len(s), len(s), rutf8.FLAG_ASCII
     except rutf8.CheckError:
-        w = DecodeWrapper((errorhandler))
-        u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle)
-        return u.encode('utf8'), pos, len(u), _get_flag(u)
+        return _str_decode_ascii_slowpath(s, errors, final, errorhandler)
+
+def _str_decode_ascii_slowpath(s, errors, final, errorhandler):
+    i = 0
+    res = StringBuilder()
+    while i < len(s):
+        ch = s[i]
+        if ord(ch) > 0x7F:
+            r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)',
+                s, i, i + 1)
+            res.append(r)
+        else:
+            res.append(ch)
+            i += 1
+    ress = res.build()
+    lgt, flag = rutf8.check_utf8(ress, True)
+    return ress, len(s), lgt, flag
 
 # XXX wrappers, think about speed
 
@@ -165,21 +378,14 @@
     def handle(self, errors, encoding, msg, s, pos, endpos):
         return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
 
-# some irregular interfaces
-def str_decode_utf8(s, slen, errors, final, errorhandler):
-    w = DecodeWrapper(errorhandler)
-    u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle,
-        runicode.allow_surrogate_by_default)
-    return u.encode('utf8'), pos, len(u), _get_flag(u)
+#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler):
+#    w = DecodeWrapper(errorhandler)
+#    u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final,
+#                                                w.handle,
+#                                                ud_handler)
+#    return u.encode('utf8'), pos, len(u), _get_flag(u)
 
-def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler):
-    w = DecodeWrapper(errorhandler)
-    u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final,
-                                                w.handle,
-                                                ud_handler)
-    return u.encode('utf8'), pos, len(u), _get_flag(u)
-
-def setup_new_encoders(encoding):
+def setup_new_encoders_legacy(encoding):
     encoder_name = 'utf8_encode_' + encoding
     encoder_call_name = 'unicode_encode_' + encoding
     decoder_name = 'str_decode_' + encoding
@@ -200,9 +406,322 @@
         globals()[decoder_name] = decoder
 
 def setup():
-    for encoding in ['utf_7', 'unicode_escape', 'raw_unicode_escape',
+    for encoding in ['raw_unicode_escape',
                      'utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32',
                      'utf_32_be', 'latin_1', 'unicode_internal']:
-        setup_new_encoders(encoding)
+        setup_new_encoders_legacy(encoding)
 
 setup()
+
+def utf8_encode_ascii(utf8, errors, errorhandler):
+    """ Don't be confused - this is a slowpath for errors e.g. "ignore"
+    or an obscure errorhandler
+    """
+    res = StringBuilder()
+    i = 0
+    pos = 0
+    while i < len(utf8):
+        ch = rutf8.codepoint_at_pos(utf8, i)
+        if ch >= 0x7F:
+            msg = "ordinal not in range(128)"
+            r, newpos = errorhandler(errors, 'ascii', msg, utf8,
+                pos, pos + 1)
+            for _ in range(newpos - pos):
+                i = rutf8.next_codepoint_pos(utf8, i)
+            pos = newpos
+            res.append(r)
+        else:
+            res.append(chr(ch))
+            i = rutf8.next_codepoint_pos(utf8, i)    
+            pos += 1
+
+    s = res.build()
+    return s
+
+# some irregular interfaces
+def str_decode_utf8(s, slen, errors, final, errorhandler):
+    xxxx
+
+    u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle,
+        runicode.allow_surrogate_by_default)
+    return u.encode('utf8'), pos, len(u), _get_flag(u)
+
+# ____________________________________________________________
+# utf-7
+
+# Three simple macros defining base-64
+
+def _utf7_IS_BASE64(oc):
+    "Is c a base-64 character?"
+    c = chr(oc)
+    return c.isalnum() or c == '+' or c == '/'
+def _utf7_TO_BASE64(n):
+    "Returns the base-64 character of the bottom 6 bits of n"
+    return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[n & 0x3f]
+def _utf7_FROM_BASE64(c):
+    "given that c is a base-64 character, what is its base-64 value?"
+    if c >= 'a':
+        return ord(c) - 71
+    elif c >= 'A':
+        return ord(c) - 65
+    elif c >= '0':
+        return ord(c) + 4
+    elif c == '+':
+        return 62
+    else: # c == '/'
+        return 63
+
+def _utf7_DECODE_DIRECT(oc):
+    return oc <= 127 and oc != ord('+')
+
+# The UTF-7 encoder treats ASCII characters differently according to
+# whether they are Set D, Set O, Whitespace, or special (i.e. none of
+# the above).  See RFC2152.  This array identifies these different
+# sets:
+# 0 : "Set D"
+#      alphanumeric and '(),-./:?
+# 1 : "Set O"
+#     !"#$%&*;<=>@[]^_`{|}
+# 2 : "whitespace"
+#     ht nl cr sp
+# 3 : special (must be base64 encoded)
+#     everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127)
+
+utf7_category = [
+#  nul soh stx etx eot enq ack bel bs  ht  nl  vt  np  cr  so  si
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  3,  3,  2,  3,  3,
+#  dle dc1 dc2 dc3 dc4 nak syn etb can em  sub esc fs  gs  rs  us
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+#  sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /
+    2,  1,  1,  1,  1,  1,  1,  0,  0,  0,  1,  3,  0,  0,  0,  0,
+#   0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  0,
+#   @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
+    1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+#   P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  3,  1,  1,  1,
+#   `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
+    1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+#   p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  del
+    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  3,  3,
+]
+
+# ENCODE_DIRECT: this character should be encoded as itself.  The
+# answer depends on whether we are encoding set O as itself, and also
+# on whether we are encoding whitespace as itself.  RFC2152 makes it
+# clear that the answers to these questions vary between
+# applications, so this code needs to be flexible.
+
+def _utf7_ENCODE_DIRECT(oc, directO, directWS):
+    return(oc < 128 and oc > 0 and
+           (utf7_category[oc] == 0 or
+            (directWS and utf7_category[oc] == 2) or
+            (directO and utf7_category[oc] == 1)))
+
+def _utf7_ENCODE_CHAR(result, oc, base64bits, base64buffer):
+    if oc >= 0x10000:
+        # code first surrogate
+        base64bits += 16
+        base64buffer = (base64buffer << 16) | 0xd800 | ((oc-0x10000) >> 10)
+        while base64bits >= 6:
+            result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6)))
+            base64bits -= 6
+        # prepare second surrogate
+        oc = 0xDC00 | ((oc-0x10000) & 0x3FF)
+    base64bits += 16
+    base64buffer = (base64buffer << 16) | oc
+    while base64bits >= 6:
+        result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6)))
+        base64bits -= 6
+    return base64bits, base64buffer
+
+def str_decode_utf_7(s, errors, final=False,
+                     errorhandler=None):
+    size = len(s)
+    if size == 0:
+        return '', 0, 0, rutf8.FLAG_ASCII
+
+    inShift = False
+    base64bits = 0
+    base64buffer = 0
+    surrogate = 0
+    outsize = 0
+
+    result = StringBuilder(size)
+    pos = 0
+    shiftOutStartPos = 0
+    flag = rutf8.FLAG_ASCII
+    startinpos = 0
+    while pos < size:
+        ch = s[pos]
+
+        if inShift: # in a base-64 section
+            if _utf7_IS_BASE64(ord(ch)): #consume a base-64 character
+                base64buffer = (base64buffer << 6) | _utf7_FROM_BASE64(ch)
+                base64bits += 6
+                pos += 1
+
+                if base64bits >= 16:
+                    # enough bits for a UTF-16 value
+                    outCh = base64buffer >> (base64bits - 16)
+                    base64bits -= 16
+                    base64buffer &= (1 << base64bits) - 1 # clear high bits
+                    assert outCh <= 0xffff
+                    if surrogate:
+                        # expecting a second surrogate
+                        if outCh >= 0xDC00 and outCh <= 0xDFFF:
+                            xxxx
+                            result.append(
+                                UNICHR((((surrogate & 0x3FF)<<10) |
+                                        (outCh & 0x3FF)) + 0x10000))
+                            surrogate = 0
+                            continue
+                        else:
+                            YYYY
+                            result.append(unichr(surrogate))
+                            surrogate = 0
+                            # Not done with outCh: falls back to next line
+                    if outCh >= 0xD800 and outCh <= 0xDBFF:
+                        # first surrogate
+                        surrogate = outCh
+                    else:
+                        flag = combine_flags(flag, rutf8.unichr_to_flag(outCh))
+                        outsize += 1
+                        rutf8.unichr_as_utf8_append(result, outCh, True)
+
+            else:
+                # now leaving a base-64 section
+                inShift = False
+
+                if base64bits > 0: # left-over bits
+                    if base64bits >= 6:
+                        # We've seen at least one base-64 character
+                        aaa
+                        pos += 1
+                        msg = "partial character in shift sequence"
+                        res, pos = errorhandler(errors, 'utf7',
+                                                msg, s, pos-1, pos)
+                        result.append(res)
+                        continue
+                    else:
+                        # Some bits remain; they should be zero
+                        if base64buffer != 0:
+                            bbb
+                            pos += 1
+                            msg = "non-zero padding bits in shift sequence"
+                            res, pos = errorhandler(errors, 'utf7',
+                                                    msg, s, pos-1, pos)
+                            result.append(res)
+                            continue
+
+                if surrogate and _utf7_DECODE_DIRECT(ord(ch)):
+                    outsize += 1
+                    flag = rutf8.FLAG_HAS_SURROGATES
+                    rutf8.unichr_as_utf8_append(result, surrogate, True)
+                surrogate = 0
+
+                if ch == '-':
+                    # '-' is absorbed; other terminating characters are
+                    # preserved
+                    pos += 1
+
+        elif ch == '+':
+            startinpos = pos
+            pos += 1 # consume '+'
+            if pos < size and s[pos] == '-': # '+-' encodes '+'
+                pos += 1
+                result.append('+')
+                outsize += 1
+            else: # begin base64-encoded section
+                inShift = 1
+                surrogate = 0
+                shiftOutStartPos = result.getlength()
+                base64bits = 0
+                base64buffer = 0
+
+        elif _utf7_DECODE_DIRECT(ord(ch)): # character decodes at itself
+            xxx
+            result.append(unichr(ord(ch)))
+            pos += 1
+        else:
+            yyy
+            startinpos = pos
+            pos += 1
+            msg = "unexpected special character"
+            res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos)
+            result.append(res)
+
+    # end of string
+    final_length = result.getlength()
+    if inShift and final: # in shift sequence, no more to follow
+        # if we're in an inconsistent state, that's an error
+        inShift = 0
+        if (surrogate or
+            base64bits >= 6 or
+            (base64bits > 0 and base64buffer != 0)):
+            msg = "unterminated shift sequence"
+            xxxx
+            res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
+            reslen, resflags = rutf8.check_utf8(res, True)
+            outsize += reslen
+            flag = combine_flags(flag, resflags)
+            result.append(res)
+            final_length = result.getlength()
+    elif inShift:
+        pos = startinpos
+        final_length = shiftOutStartPos # back off output
+
+    assert final_length >= 0
+    return result.build()[:final_length], pos, outsize, flag
+
+def utf8_encode_utf_7(s, errors, errorhandler=None):
+    size = len(s)
+    if size == 0:
+        return ''
+    result = StringBuilder(size)
+
+    encodeSetO = encodeWhiteSpace = False
+
+    inShift = False
+    base64bits = 0
+    base64buffer = 0
+
+    pos = 0
+    while pos < size:
+        oc = rutf8.codepoint_at_pos(s, pos)
+        if not inShift:
+            if oc == ord('+'):
+                result.append('+-')
+            elif _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace):
+                result.append(chr(oc))
+            else:
+                result.append('+')
+                inShift = True
+                base64bits, base64buffer = _utf7_ENCODE_CHAR(
+                    result, oc, base64bits, base64buffer)
+        else:
+            if _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace):
+                # shifting out
+                if base64bits: # output remaining bits
+                    result.append(_utf7_TO_BASE64(base64buffer << (6-base64bits)))
+                    base64buffer = 0
+                    base64bits = 0
+
+                inShift = False
+                ## Characters not in the BASE64 set implicitly unshift the
+                ## sequence so no '-' is required, except if the character is
+                ## itself a '-'
+                if _utf7_IS_BASE64(oc) or oc == ord('-'):
+                    result.append('-')
+                result.append(chr(oc))
+            else:
+                base64bits, base64buffer = _utf7_ENCODE_CHAR(
+                    result, oc, base64bits, base64buffer)
+        pos = rutf8.next_codepoint_pos(s, pos)
+
+    if base64bits:
+        result.append(_utf7_TO_BASE64(base64buffer << (6 - base64bits)))
+    if inShift:
+        result.append('-')
+
+    return result.build()
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -37,6 +37,7 @@
             if decode:
                 w_cls = space.w_UnicodeDecodeError
                 w_input = space.newbytes(input)
+                length = len(input)
             else:
                 w_cls = space.w_UnicodeEncodeError
                 length, flag = rutf8.check_utf8(input, allow_surrogates=True)
@@ -61,17 +62,13 @@
             w_replace, w_newpos = space.fixedview(w_res, 2)
             newpos = space.int_w(w_newpos)
             if newpos < 0:
-                newpos = len(input) + newpos
-            if newpos < 0 or newpos > len(input):
+                newpos = length + newpos
+            if newpos < 0 or newpos > length:
                 raise oefmt(space.w_IndexError,
                             "position %d from error handler out of bounds",
                             newpos)
             w_replace = space.convert_to_w_unicode(w_replace)
-            replace = w_replace._utf8.decode('utf8')
-            if decode:
-                return replace, newpos
-            else:
-                return replace, None, newpos
+            return w_replace._utf8, newpos
         return call_errorhandler
 
     def make_decode_errorhandler(self, space):
@@ -384,8 +381,7 @@
         func = getattr(unicodehelper, rname)
         utf8len = w_arg._length
         # XXX deal with func() returning length or not
-        result = func(w_arg._utf8, utf8len,
-            errors, state.encode_error_handler)
+        result = func(w_arg._utf8, errors, state.encode_error_handler)
         return space.newtuple([space.newbytes(result), space.newint(utf8len)])
     wrap_encoder.func_name = rname
     globals()[name] = wrap_encoder
@@ -403,7 +399,7 @@
         final = space.is_true(w_final)
         state = space.fromcache(CodecState)
         func = getattr(unicodehelper, rname)
-        result, consumed, length, flag = func(string, len(string), errors,
+        result, consumed, length, flag = func(string, errors,
                                               final, state.decode_error_handler)
         return space.newtuple([space.newutf8(result, length, flag),
                                space.newint(consumed)])
@@ -476,8 +472,6 @@
     try:
         lgt, flag = rutf8.check_utf8(string, allow_surrogates=True)
     except rutf8.CheckError:
-        # XXX do the way around runicode - we can optimize it later if we
-        # decide we care about obscure cases
         res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string,
             len(string), errors, final, state.decode_error_handler)
         return space.newtuple([space.newutf8(res, lgt, flag),
@@ -695,7 +689,7 @@
     unicode_name_handler = state.get_unicodedata_handler(space)
 
     result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape(
-        string, len(string), errors,
+        string, errors,
         final, state.decode_error_handler,
         unicode_name_handler)
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -9,7 +9,6 @@
 from rpython.rlib.rstring import (
     StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith,
     endswith)
-from rpython.rlib.runicode import make_unicode_escape_function
 from rpython.rlib import rutf8, jit
 
 from pypy.interpreter import unicodehelper
@@ -48,9 +47,16 @@
         else:
             assert flag == rutf8.FLAG_REGULAR
             self._index_storage = rutf8.null_storage()
+        # XXX checking, remove before any performance measurments
+        #     ifdef not_running_in_benchmark
         lgt, flag_check = rutf8.check_utf8(utf8str, True)
         assert lgt == length
-        assert flag == flag_check
+        if flag_check == rutf8.FLAG_ASCII:
+            # there are cases where we copy part of REULAR that happens
+            # to be ascii
+            assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR)
+        else:
+            assert flag == flag_check
         # the storage can be one of:
         # - null, unicode with no surrogates
         # - rutf8.UTF8_HAS_SURROGATES
@@ -351,7 +357,7 @@
             elif unicodedb.islower(ch):
                 ch = unicodedb.toupper(ch)
             if ch >= 0x80:
-                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, ch)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
@@ -376,7 +382,7 @@
             else:
                 ch = unicodedb.tolower(ch)
             if ch >= 0x80:
-                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, ch)
             previous_is_cased = unicodedb.iscased(ch)
         return builder.build(), flag
@@ -402,7 +408,7 @@
                     codepoint = space.int_w(w_newval)
                 elif isinstance(w_newval, W_UnicodeObject):
                     result.append(w_newval._utf8)
-                    flag = self._combine_flags(flag, w_newval._get_flag())
+                    flag = unicodehelper.combine_flags(flag, w_newval._get_flag())
                     result_length += w_newval._length
                     continue
                 else:
@@ -411,7 +417,7 @@
                                 "or unicode")
             try:
                 if codepoint >= 0x80:
-                    flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                    flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
                 rutf8.unichr_as_utf8_append(result, codepoint,
                                             allow_surrogates=True)
                 result_length += 1
@@ -535,7 +541,7 @@
         while pos < len(self._utf8):
             lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
             if lower >= 0x80:
-                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
             pos = rutf8.next_codepoint_pos(self._utf8, pos)
         return W_UnicodeObject(builder.build(), self._len(), flag)
@@ -623,15 +629,6 @@
             return True
         return endswith(value, prefix, start, end)
 
-    @staticmethod
-    def _combine_flags(self_flag, other_flag):
-        if self_flag == rutf8.FLAG_ASCII and other_flag == rutf8.FLAG_ASCII:
-            return rutf8.FLAG_ASCII
-        elif (self_flag == rutf8.FLAG_HAS_SURROGATES or
-              other_flag == rutf8.FLAG_HAS_SURROGATES):
-            return rutf8.FLAG_HAS_SURROGATES
-        return rutf8.FLAG_REGULAR
-
     def _get_flag(self):
         if self.is_ascii():
             return rutf8.FLAG_ASCII
@@ -646,7 +643,7 @@
             if e.match(space, space.w_TypeError):
                 return space.w_NotImplemented
             raise
-        flag = self._combine_flags(self._get_flag(), w_other._get_flag())
+        flag = unicodehelper.combine_flags(self._get_flag(), w_other._get_flag())
         return W_UnicodeObject(self._utf8 + w_other._utf8,
                                self._len() + w_other._len(), flag)
 
@@ -671,7 +668,7 @@
             # XXX Maybe the extra copy here is okay? It was basically going to
             #     happen anyway, what with being placed into the builder
             w_u = self.convert_arg_to_w_unicode(space, w_s)
-            flag = self._combine_flags(flag, w_u._get_flag())
+            flag = unicodehelper.combine_flags(flag, w_u._get_flag())
             unwrapped.append(w_u._utf8)
             lgt += w_u._length
             prealloc_size += len(unwrapped[i])
@@ -723,7 +720,7 @@
             uchar = rutf8.codepoint_at_pos(value, i)
             uchar = unicodedb.toupper(uchar)
             if uchar >= 0x80:
-                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
             i = rutf8.next_codepoint_pos(value, i)
             rutf8.unichr_as_utf8_append(builder, uchar)
         return W_UnicodeObject(builder.build(), self._length, flag)
@@ -837,14 +834,14 @@
         ch = unicodedb.toupper(uchar)
         rutf8.unichr_as_utf8_append(builder, ch)
         if ch >= 0x80:
-            flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+            flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
             i = rutf8.next_codepoint_pos(value, i)
             ch = unicodedb.tolower(uchar)
             rutf8.unichr_as_utf8_append(builder, ch)
             if ch >= 0x80:
-                flag = self._combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
         return W_UnicodeObject(builder.build(), self._len(), flag)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
@@ -930,7 +927,7 @@
         except OverflowError:
             raise oefmt(space.w_OverflowError, "replace string is too long")
 
-        flag = self._combine_flags(self._get_flag(), w_by._get_flag())
+        flag = unicodehelper.combine_flags(self._get_flag(), w_by._get_flag())
         newlength = self._length + replacements * (w_by._length - w_sub._length)
         return W_UnicodeObject(res, newlength, flag)
 
@@ -1052,7 +1049,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "rjust() argument 2 must be a single character")
-        flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag())
+        flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - lgt
         if d > 0:
             if len(w_fillchar._utf8) == 1:
@@ -1071,7 +1068,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "ljust() argument 2 must be a single character")
-        flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag())
+        flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - self._len()
         if d > 0:
             if len(w_fillchar._utf8) == 1:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -452,6 +452,13 @@
     ('ofs', lltype.FixedSizeArray(lltype.Char, 16)))
     ))))
 
+def unichr_to_flag(ch):
+    if ch <= 0x7F:
+        return FLAG_ASCII
+    elif 0xD800 <= ch <= 0xDFFF:
+        return FLAG_HAS_SURROGATES
+    return FLAG_REGULAR
+
 FLAG_REGULAR = 0
 FLAG_HAS_SURROGATES = 1
 FLAG_ASCII = 2

From pypy.commits at gmail.com  Wed Nov 15 12:27:40 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 15 Nov 2017 09:27:40 -0800 (PST)
Subject: [pypy-commit] pypy default: fix test_whatsnew
Message-ID: <5a0c790c.c78c1c0a.f0d32.730d@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r93045:95e0fdd7cd86
Date: 2017-11-15 18:26 +0100
http://bitbucket.org/pypy/pypy/changeset/95e0fdd7cd86/

Log:	fix test_whatsnew

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -20,3 +20,9 @@
 
 .. branch: run-extra-tests
 Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)

From pypy.commits at gmail.com  Wed Nov 15 13:27:28 2017
From: pypy.commits at gmail.com (fijal)
Date: Wed, 15 Nov 2017 10:27:28 -0800 (PST)
Subject: [pypy-commit] pypy rpython-20: start a branch to play with stronger
 type guarantees
Message-ID: <5a0c8710.8dd71c0a.4e787.9218@mx.google.com>

Author: fijal
Branch: rpython-20
Changeset: r93046:5f87d65c7f82
Date: 2017-11-15 19:26 +0100
http://bitbucket.org/pypy/pypy/changeset/5f87d65c7f82/

Log:	start a branch to play with stronger type guarantees

diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py
--- a/rpython/annotator/model.py
+++ b/rpython/annotator/model.py
@@ -47,19 +47,33 @@
     allow_int_to_float = True
 TLS = State()
 
+def compare_dict(d1, d2, ommit):
+    for k, v in d1.iteritems():
+        if k in ommit:
+            continue
+        if k not in d2 or v != d2[k]:
+            return False
+    for k, v in d2.iteritems():
+        if k in ommit:
+            continue
+        if k not in d1: # don't need to compare again
+            return False
+    return True
+
 class SomeObject(object):
     """The set of all objects.  Each instance stands
     for an arbitrary object about which nothing is known."""
     __metaclass__ = extendabletype
     immutable = False
     knowntype = object
+    can_union = True
 
     def __init__(self):
         assert type(self) is not SomeObject
 
     def __eq__(self, other):
         return (self.__class__ is other.__class__ and
-                self.__dict__  == other.__dict__)
+                compare_dict(self.__dict__, other.__dict__, ('can_union',)))
 
     def __ne__(self, other):
         return not (self == other)
@@ -74,7 +88,7 @@
         else:
             reprdict[self] = True
             try:
-                items = self.__dict__.items()
+                items = [x for x in self.__dict__.items() if x[0] != 'can_union']
                 items.sort()
                 args = []
                 for k, v in items:
@@ -269,11 +283,10 @@
         d1 = self.__dict__
         d2 = other.__dict__
         if not TLS.check_str_without_nul:
-            d1 = d1.copy()
-            d1['no_nul'] = 0
-            d2 = d2.copy()
-            d2['no_nul'] = 0
-        return d1 == d2
+            ommit = ('no_nul', 'can_union')
+        else:
+            ommit = ()
+        return compare_dict(d1, d2, ommit)
 
     def nonnoneify(self):
         return self.__class__(can_be_None=False, no_nul=self.no_nul)
@@ -341,11 +354,8 @@
             return False
         if not self.listdef.same_as(other.listdef):
             return False
-        selfdic = self.__dict__.copy()
-        otherdic = other.__dict__.copy()
-        del selfdic['listdef']
-        del otherdic['listdef']
-        return selfdic == otherdic
+        return compare_dict(self.__dict__, other.__dict__,
+                            ('listdef', 'can_union'))
 
     def can_be_none(self):
         return True
@@ -383,11 +393,8 @@
             return False
         if not self.dictdef.same_as(other.dictdef):
             return False
-        selfdic = self.__dict__.copy()
-        otherdic = other.__dict__.copy()
-        del selfdic['dictdef']
-        del otherdic['dictdef']
-        return selfdic == otherdic
+        return compare_dict(self.__dict__, other.__dict__,
+                            ('dictdef', 'can_union'))
 
     def can_be_none(self):
         return True
@@ -755,8 +762,15 @@
         if s1 == s2:
             # Most pair(...).union() methods deal incorrectly with that case
             # when constants are involved.
-            return s1
-        return pair(s1, s2).union()
+            r = s1
+        else:
+            r = pair(s1, s2).union()
+        if not s1.can_union and not s1 == r:
+            raise AnnotatorError("Merging %s and %s forbidden" % (s2, s1))
+        if not s2.can_union and not s2 == r:
+            raise AnnotatorError("Merging %s and %s forbidden" % (s1, s2))
+        return r
+
     finally:
         TLS.no_side_effects_in_union -= 1
 
@@ -773,6 +787,17 @@
         # See comment in union() above
         if s1 != s2:
             s1 = pair(s1, s2).union()
+    for i, s in enumerate(somevalues):
+        if not s.can_union and not s == s1:
+            l = []
+            for j, _s in enumerate(somevalues):
+                if i == j:
+                    l.append("* " + repr(_s))
+                else:
+                    l.append("  " + repr(_s))
+            allargs = "\n".join(l)
+            raise AnnotatorError("Merging:\n%s\nwill produce %s, * marks strict"
+                " which cannot be generalized" % (allargs, s1))
     return s1
 
 
diff --git a/rpython/annotator/test/test_strongly_typed.py b/rpython/annotator/test/test_strongly_typed.py
new file mode 100644
--- /dev/null
+++ b/rpython/annotator/test/test_strongly_typed.py
@@ -0,0 +1,40 @@
+
+import py
+
+from rpython.conftest import option
+
+from rpython.annotator import model
+from rpython.annotator.annrpython import RPythonAnnotator as _RPythonAnnotator
+
+
+class TestAnnotateTestCase:
+    class RPythonAnnotator(_RPythonAnnotator):
+        def build_types(self, *args):
+            s = _RPythonAnnotator.build_types(self, *args)
+            self.validate()
+            if option.view:
+                self.translator.view()
+            return s
+
+    def build_types(self, func, types):
+        a = self.RPythonAnnotator()
+        return a.build_types(func, types)
+
+    def test_simple(self):
+        def f(a):
+            return a
+
+        s = model.SomeInteger()
+        s.can_union = False
+        self.build_types(f, [s])
+        assert s == model.SomeInteger()
+
+    def test_generalize_boom(self):
+        def f(i):
+            if i % 15 == 0:
+                return f(1.5)
+            return i
+
+        s = model.SomeInteger()
+        s.can_union = False
+        py.test.raises(model.AnnotatorError, self.build_types, f, [s])
diff --git a/rpython/doc/signatures.rst b/rpython/doc/signatures.rst
new file mode 100644
--- /dev/null
+++ b/rpython/doc/signatures.rst
@@ -0,0 +1,54 @@
+
+Basic types::
+
+  int    - signed machine size integer
+  r_uint - unsigned machine size integer
+  r_long/r_ulong/r_longlong/r_ulonglong - various integers
+  char   - single character (byte)
+  bytes  - immutable array of chars
+  bytes? - nullable bytes
+  float  - double-sized IEEE floating point
+
+Low level types:
+
+  ll.UCHAR
+  ll.INT
+  ...
+  ll.Array(xxx)
+  ll.Struct(xxx)
+  ll.GcStruct(xxx)
+  ll.GcArray(xxx)
+
+Container types::
+
+  list(X)        - resizable list of X
+  array(X)       - non-resizable list of X
+  dict(X, Y)     - dict of X keys and Y values
+  tuple(A, B, C) - tuple of 3 items, A, B, C
+  list?(X)       - nullable list, array or dict
+
+Classes::
+
+  class A(object):
+      _rpython_ = """
+      class foobar.A # <- namespace declaration for type name
+
+      a: int
+      b: list(int)
+      c: array(int)
+      """
+
+PBCs::
+
+  space = rpython_pbc("space.ObjSpace", space) - registers PBC under the name "space.ObjSpace",
+                                                 to be used in signatures
+
+Examples of a signature::
+
+  @rpython("int -> int")
+  def f(a):
+      return a
+
+  @rpython("space.ObjSpace, int, float -> bytes")
+  def f(space, i, f):
+      return space.str_w(space.newbytes(str(i)))

From pypy.commits at gmail.com  Wed Nov 15 17:09:04 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 14:09:04 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Remove obsolete PyPy-specific changes
Message-ID: <5a0cbb00.178fdf0a.93bfd.ed98@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93047:4fe92f1fbcbf
Date: 2017-11-15 20:40 +0000
http://bitbucket.org/pypy/pypy/changeset/4fe92f1fbcbf/

Log:	Remove obsolete PyPy-specific changes

diff --git a/lib-python/3/test/test_cmd_line_script.py b/lib-python/3/test/test_cmd_line_script.py
--- a/lib-python/3/test/test_cmd_line_script.py
+++ b/lib-python/3/test/test_cmd_line_script.py
@@ -43,11 +43,7 @@
 _loader = __loader__ if __loader__ is BuiltinImporter else type(__loader__)
 print('__loader__==%a' % _loader)
 print('__file__==%a' % __file__)
-if __cached__ is not None:
-    # XXX: test_script_compiled on PyPy
-    assertEqual(__file__, __cached__)
-    if not __cached__.endswith(('pyc', 'pyo')):
-        raise AssertionError('has __cached__ but not compiled')
+print('__cached__==%a' % __cached__)
 print('__package__==%r' % __package__)
 # Check PEP 451 details
 import os.path
@@ -239,9 +235,8 @@
     def test_basic_script(self):
         with support.temp_dir() as script_dir:
             script_name = _make_test_script(script_dir, 'script')
-            package = '' if support.check_impl_detail(pypy=True) else None
             self._check_script(script_name, script_name, script_name,
-                               script_dir, package,
+                               script_dir, None,
                                importlib.machinery.SourceFileLoader)
 
     def test_script_compiled(self):
@@ -250,9 +245,8 @@
             py_compile.compile(script_name, doraise=True)
             os.remove(script_name)
             pyc_file = support.make_legacy_pyc(script_name)
-            package = '' if support.check_impl_detail(pypy=True) else None
             self._check_script(pyc_file, pyc_file,
-                               pyc_file, script_dir, package,
+                               pyc_file, script_dir, None,
                                importlib.machinery.SourcelessFileLoader)
 
     def test_directory(self):

From pypy.commits at gmail.com  Wed Nov 15 17:09:06 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 14:09:06 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Always initialise __main__.__loader__ and
 __main__.__builtins__ (CPython does, it, don't ask me why)
Message-ID: <5a0cbb02.88acdf0a.4f669.a427@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93048:6fc0a7040472
Date: 2017-11-15 22:08 +0000
http://bitbucket.org/pypy/pypy/changeset/6fc0a7040472/

Log:	Always initialise __main__.__loader__ and __main__.__builtins__
	(CPython does, it, don't ask me why)

diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py
--- a/pypy/interpreter/app_main.py
+++ b/pypy/interpreter/app_main.py
@@ -579,6 +579,8 @@
         __pypy__.save_module_content_for_future_reload(sys)
 
     mainmodule = type(sys)('__main__')
+    mainmodule.__loader__ = sys.__loader__
+    mainmodule.__builtins__ = os.__builtins__
     sys.modules['__main__'] = mainmodule
 
     if not no_site:

From pypy.commits at gmail.com  Wed Nov 15 22:57:59 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 19:57:59 -0800 (PST)
Subject: [pypy-commit] pypy default: Kill confusing function callback case
 in emulate_pbc_call()
Message-ID: <5a0d0cc7.3bb0df0a.d4ca0.17f2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93050:203414415a39
Date: 2016-11-20 17:24 +0000
http://bitbucket.org/pypy/pypy/changeset/203414415a39/

Log:	Kill confusing function callback case in emulate_pbc_call()

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -313,17 +313,12 @@
             parent_graph, parent_block, parent_index = whence
             tag = parent_block, parent_index
             self.translator.update_call_graph(parent_graph, graph, tag)
-        # self.notify[graph.returnblock] is a dictionary of call
+        # self.notify[graph.returnblock] is a set of call
         # points to this func which triggers a reflow whenever the
         # return block of this graph has been analysed.
-        callpositions = self.notify.setdefault(graph.returnblock, {})
+        returnpositions = self.notify.setdefault(graph.returnblock, set())
         if whence is not None:
-            if callable(whence):
-                def callback():
-                    whence(self, graph)
-            else:
-                callback = whence
-            callpositions[callback] = True
+            returnpositions.add(whence)
 
         # generalize the function's input arguments
         self.addpendingblock(graph, graph.startblock, inputcells)
@@ -574,12 +569,8 @@
                 self.follow_link(graph, link, constraints)
 
         if block in self.notify:
-            # reflow from certain positions when this block is done
-            for callback in self.notify[block]:
-                if isinstance(callback, tuple):
-                    self.reflowfromposition(callback) # callback is a position
-                else:
-                    callback()
+            for position in self.notify[block]:
+                self.reflowfromposition(position)
 
 
     def follow_link(self, graph, link, constraints):
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -547,10 +547,8 @@
         (position_key, "first") and (position_key, "second").
 
         In general, "unique_key" should somehow uniquely identify where
-        the call is in the source code, and "callback" can be either a
-        position_key to reflow from when we see more general results,
-        or a real callback function that will be called with arguments
-        # "(annotator, called_graph)" whenever the result is generalized.
+        the call is in the source code, and "callback" is a
+        position_key to reflow from when we see more general results.
 
         "replace" can be set to a list of old unique_key values to
         forget now, because the given "unique_key" replaces them.
diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -2141,28 +2141,6 @@
         assert (fdesc.get_s_signatures((2, (), False))
                 == [([someint,someint],someint)])
 
-    def test_emulated_pbc_call_callback(self):
-        def f(a,b):
-            return a + b
-        from rpython.annotator import annrpython
-        a = annrpython.RPythonAnnotator()
-        from rpython.annotator import model as annmodel
-
-        memo = []
-        def callb(ann, graph):
-            memo.append(annmodel.SomeInteger() == ann.binding(graph.getreturnvar()))
-
-        s_f = a.bookkeeper.immutablevalue(f)
-        s = a.bookkeeper.emulate_pbc_call('f', s_f, [annmodel.SomeInteger(), annmodel.SomeInteger()],
-                                          callback=callb)
-        assert s == annmodel.SomeImpossibleValue()
-        a.complete()
-
-        assert a.binding(graphof(a, f).getreturnvar()).knowntype == int
-        assert len(memo) >= 1
-        for t in memo:
-            assert t
-
     def test_iterator_union(self):
         def it(d):
             return d.iteritems()

From pypy.commits at gmail.com  Wed Nov 15 22:57:57 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 19:57:57 -0800 (PST)
Subject: [pypy-commit] pypy default: Simplify code
Message-ID: <5a0d0cc5.43aadf0a.d1e02.0e7f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93049:5e549a04ab94
Date: 2016-11-20 16:57 +0000
http://bitbucket.org/pypy/pypy/changeset/5e549a04ab94/

Log:	Simplify code

diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -1,5 +1,5 @@
 from rpython.annotator.model import (
-    s_ImpossibleValue, SomeInteger, s_Bool, union)
+    s_ImpossibleValue, SomeInteger, s_Bool, union, AnnotatorError)
 from rpython.annotator.listdef import ListItem
 from rpython.rlib.objectmodel import compute_hash
 
@@ -51,23 +51,19 @@
 
         s_key = self.s_value
 
-        def check_eqfn(annotator, graph):
-            s = annotator.binding(graph.getreturnvar())
-            assert s_Bool.contains(s), (
+        s = self.bookkeeper.emulate_pbc_call(
+            myeq, self.s_rdict_eqfn, [s_key, s_key], replace=replace_othereq)
+        if not s_Bool.contains(s):
+            raise AnnotatorError(
                 "the custom eq function of an r_dict must return a boolean"
                 " (got %r)" % (s,))
-        self.bookkeeper.emulate_pbc_call(myeq, self.s_rdict_eqfn, [s_key, s_key],
-                                         replace=replace_othereq,
-                                         callback = check_eqfn)
 
-        def check_hashfn(annotator, graph):
-            s = annotator.binding(graph.getreturnvar())
-            assert SomeInteger().contains(s), (
+        s = self.bookkeeper.emulate_pbc_call(
+            myhash, self.s_rdict_hashfn, [s_key], replace=replace_otherhash)
+        if not SomeInteger().contains(s):
+            raise AnnotatorError(
                 "the custom hash function of an r_dict must return an integer"
                 " (got %r)" % (s,))
-        self.bookkeeper.emulate_pbc_call(myhash, self.s_rdict_hashfn, [s_key],
-                                         replace=replace_otherhash,
-                                         callback = check_hashfn)
 
 
 class DictValue(ListItem):

From pypy.commits at gmail.com  Wed Nov 15 22:58:02 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 19:58:02 -0800 (PST)
Subject: [pypy-commit] pypy default: small cleanup
Message-ID: <5a0d0cca.53d71c0a.2ddc.2e50@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93051:08eace48ed36
Date: 2016-11-20 22:11 +0000
http://bitbucket.org/pypy/pypy/changeset/08eace48ed36/

Log:	small cleanup

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -309,15 +309,14 @@
     #___ interface for annotator.bookkeeper _______
 
     def recursivecall(self, graph, whence, inputcells):
-        if isinstance(whence, tuple):
+        if whence is not None:
             parent_graph, parent_block, parent_index = whence
             tag = parent_block, parent_index
             self.translator.update_call_graph(parent_graph, graph, tag)
-        # self.notify[graph.returnblock] is a set of call
-        # points to this func which triggers a reflow whenever the
-        # return block of this graph has been analysed.
-        returnpositions = self.notify.setdefault(graph.returnblock, set())
-        if whence is not None:
+            # self.notify[graph.returnblock] is a set of call
+            # points to this func which triggers a reflow whenever the
+            # return block of this graph has been analysed.
+            returnpositions = self.notify.setdefault(graph.returnblock, set())
             returnpositions.add(whence)
 
         # generalize the function's input arguments

From pypy.commits at gmail.com  Thu Nov 16 00:01:04 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 15 Nov 2017 21:01:04 -0800 (PST)
Subject: [pypy-commit] pypy default: Clean up rerased: split interp-level
 ErasingPairIdentity from translator-level IdentityDesc
Message-ID: <5a0d1b90.ddb1df0a.bddec.10dc@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93052:4c883891e3d7
Date: 2016-11-21 19:46 +0000
http://bitbucket.org/pypy/pypy/changeset/4c883891e3d7/

Log:	Clean up rerased: split interp-level ErasingPairIdentity from
	translator-level IdentityDesc

diff --git a/rpython/rlib/rerased.py b/rpython/rlib/rerased.py
--- a/rpython/rlib/rerased.py
+++ b/rpython/rlib/rerased.py
@@ -15,6 +15,8 @@
 """
 
 import sys
+from collections import defaultdict
+
 from rpython.annotator import model as annmodel
 from rpython.rtyper.extregistry import ExtRegistryEntry
 from rpython.rtyper.llannotation import lltype_to_annotation
@@ -48,34 +50,29 @@
     def __deepcopy__(self, memo):
         return self
 
-    def _getdict(self, bk):
-        try:
-            dict = bk._erasing_pairs_tunnel
-        except AttributeError:
-            dict = bk._erasing_pairs_tunnel = {}
-        return dict
+class IdentityDesc(object):
+    def __init__(self, bookkeeper):
+        self.bookkeeper = bookkeeper
+        self.s_input = annmodel.s_ImpossibleValue
+        self.reflowpositions = {}
 
-    def enter_tunnel(self, bookkeeper, s_obj):
-        dict = self._getdict(bookkeeper)
-        s_previousobj, reflowpositions = dict.setdefault(
-            self, (annmodel.s_ImpossibleValue, {}))
-        s_obj = annmodel.unionof(s_previousobj, s_obj)
-        if s_obj != s_previousobj:
-            dict[self] = (s_obj, reflowpositions)
-            for position in reflowpositions:
-                bookkeeper.annotator.reflowfromposition(position)
+    def enter_tunnel(self, s_obj):
+        s_obj = annmodel.unionof(self.s_input, s_obj)
+        if s_obj != self.s_input:
+            self.s_input = s_obj
+            for position in self.reflowpositions:
+                self.bookkeeper.annotator.reflowfromposition(position)
 
-    def leave_tunnel(self, bookkeeper):
-        dict = self._getdict(bookkeeper)
-        s_obj, reflowpositions = dict.setdefault(
-            self, (annmodel.s_ImpossibleValue, {}))
-        reflowpositions[bookkeeper.position_key] = True
-        return s_obj
+    def leave_tunnel(self):
+        self.reflowpositions[self.bookkeeper.position_key] = True
+        return self.s_input
 
-    def get_input_annotation(self, bookkeeper):
-        dict = self._getdict(bookkeeper)
-        s_obj, _ = dict[self]
-        return s_obj
+def _get_desc(bk, identity):
+    try:
+        descs = bk._erasing_pairs_descs
+    except AttributeError:
+        descs = bk._erasing_pairs_descs = defaultdict(lambda: IdentityDesc(bk))
+    return descs[identity]
 
 _identity_for_ints = ErasingPairIdentity("int")
 
@@ -94,21 +91,23 @@
         _about_ = erase
 
         def compute_result_annotation(self, s_obj):
-            identity.enter_tunnel(self.bookkeeper, s_obj)
+            desc = _get_desc(self.bookkeeper, identity)
+            desc.enter_tunnel(s_obj)
             return _some_erased()
 
         def specialize_call(self, hop):
             bk = hop.rtyper.annotator.bookkeeper
-            s_obj = identity.get_input_annotation(bk)
+            desc = _get_desc(bk, identity)
             hop.exception_cannot_occur()
-            return _rtype_erase(hop, s_obj)
+            return _rtype_erase(hop, desc.s_input)
 
     class Entry(ExtRegistryEntry):
         _about_ = unerase
 
         def compute_result_annotation(self, s_obj):
             assert _some_erased().contains(s_obj)
-            return identity.leave_tunnel(self.bookkeeper)
+            desc = _get_desc(self.bookkeeper, identity)
+            return desc.leave_tunnel()
 
         def specialize_call(self, hop):
             hop.exception_cannot_occur()
@@ -130,6 +129,7 @@
     def __init__(self, x, identity):
         self._x = x
         self._identity = identity
+
     def __repr__(self):
         return "Erased(%r, %r)" % (self._x, self._identity)
 
@@ -140,7 +140,7 @@
             assert config.translation.taggedpointers, "need to enable tagged pointers to use erase_int"
             return lltype.cast_int_to_ptr(r_self.lowleveltype, value._x * 2 + 1)
         bk = r_self.rtyper.annotator.bookkeeper
-        s_obj = value._identity.get_input_annotation(bk)
+        s_obj = _get_desc(bk, value._identity).s_input
         r_obj = r_self.rtyper.getrepr(s_obj)
         if r_obj.lowleveltype is lltype.Void:
             return lltype.nullptr(r_self.lowleveltype.TO)
@@ -182,9 +182,9 @@
     _type_ = Erased
 
     def compute_annotation(self):
-        identity = self.instance._identity
+        desc = _get_desc(self.bookkeeper, self.instance._identity)
         s_obj = self.bookkeeper.immutablevalue(self.instance._x)
-        identity.enter_tunnel(self.bookkeeper, s_obj)
+        desc.enter_tunnel(s_obj)
         return _some_erased()
 
 # annotation and rtyping support

From pypy.commits at gmail.com  Thu Nov 16 04:35:07 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 01:35:07 -0800 (PST)
Subject: [pypy-commit] pypy default: cherry-pick a small part of the
 continulet-no-frame-loop branch and make stack() available to all tests;
 fix test_f_back when run with -A
Message-ID: <5a0d5bcb.7996df0a.4610b.47df@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r93053:1cac28ee833b
Date: 2017-11-16 10:33 +0100
http://bitbucket.org/pypy/pypy/changeset/1cac28ee833b/

Log:	cherry-pick a small part of the continulet-no-frame-loop branch and
	make stack() available to all tests; fix test_f_back when run with
	-A

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -8,6 +8,35 @@
         cls.w_translated = cls.space.wrap(
             os.path.join(os.path.dirname(__file__),
                          'test_translated.py'))
+        cls.w_stack = cls.space.appexec([], """():
+            import sys
+            def stack(f=None):
+                '''
+                get the call-stack of the caller or the specified frame
+                '''
+                if f is None:
+                    f = sys._getframe(1)
+                res = []
+                seen = set()
+                while f:
+                    if f in seen:
+                        # frame cycle
+                        res.append('...')
+                        break
+                    if f.f_code.co_name == 'runtest':
+                        # if we are running with -A, cut all the stack above
+                        # the test function
+                        break
+                    seen.add(f)
+                    res.append(f.f_code.co_name)
+                    f = f.f_back
+                #print res
+                return res
+            return stack
+       """)
+        if cls.runappdirect:
+            # make sure that "self.stack" does not pass the self
+            cls.w_stack = staticmethod(cls.w_stack.im_func)
 
     def test_new_empty(self):
         from _continuation import continulet
@@ -339,26 +368,8 @@
     def test_f_back(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
-        def stack(f=None):
-            """
-            get the call-stack of the caller or the specified frame
-            """
-            if f is None:
-                f = sys._getframe(1)
-            res = []
-            seen = set()
-            while f:
-                if f in seen:
-                    # frame loop
-                    res.append('...')
-                    break
-                seen.add(f)
-                res.append(f.f_code.co_name)
-                f = f.f_back
-            #print res
-            return res
-
         def bar(c):
             assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))

From pypy.commits at gmail.com  Thu Nov 16 06:43:22 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 16 Nov 2017 03:43:22 -0800 (PST)
Subject: [pypy-commit] cffi default: Issue #343 [patch by david naylor]
Message-ID: <5a0d79da.7996df0a.4610b.6ccd@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r3051:d5661822dee4
Date: 2017-11-16 12:42 +0100
http://bitbucket.org/cffi/cffi/changeset/d5661822dee4/

Log:	Issue #343 [patch by david naylor]

	Fix test_recompiler for libc++

diff --git a/testing/cffi1/test_recompiler.py b/testing/cffi1/test_recompiler.py
--- a/testing/cffi1/test_recompiler.py
+++ b/testing/cffi1/test_recompiler.py
@@ -2270,7 +2270,7 @@
         char32_t foo_4bytes(char32_t);
     """)
     lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
-    #if !defined(__cplusplus) || __cplusplus < 201103L
+    #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L)
     typedef uint_least16_t char16_t;
     typedef uint_least32_t char32_t;
     #endif

From pypy.commits at gmail.com  Thu Nov 16 06:43:48 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 16 Nov 2017 03:43:48 -0800 (PST)
Subject: [pypy-commit] pypy default: CFFI Issue #343 [patch by david naylor]
Message-ID: <5a0d79f4.95091c0a.dec4e.aef8@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93054:34aff140932c
Date: 2017-11-16 12:43 +0100
http://bitbucket.org/pypy/pypy/changeset/34aff140932c/

Log:	CFFI Issue #343 [patch by david naylor]

	Fix test_recompiler for libc++

diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
@@ -2271,7 +2271,7 @@
         char32_t foo_4bytes(char32_t);
     """)
     lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
-    #if !defined(__cplusplus) || __cplusplus < 201103L
+    #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L)
     typedef uint_least16_t char16_t;
     typedef uint_least32_t char32_t;
     #endif

From pypy.commits at gmail.com  Thu Nov 16 10:21:27 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 16 Nov 2017 07:21:27 -0800 (PST)
Subject: [pypy-commit] pypy refactor-PyFloat_FromString: refactor possible
 recursion in PyFloat_FromString
Message-ID: <5a0dacf7.22a8df0a.e5ef.a448@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: refactor-PyFloat_FromString
Changeset: r93055:57019e77c377
Date: 2017-11-16 17:19 +0200
http://bitbucket.org/pypy/pypy/changeset/57019e77c377/

Log:	refactor possible recursion in PyFloat_FromString

diff --git a/pypy/module/cpyext/floatobject.py b/pypy/module/cpyext/floatobject.py
--- a/pypy/module/cpyext/floatobject.py
+++ b/pypy/module/cpyext/floatobject.py
@@ -1,12 +1,12 @@
 from rpython.rtyper.lltypesystem import rffi, lltype
+from rpython.rlib import rarithmetic
 from pypy.module.cpyext.api import (PyObjectFields, bootstrap_function,
     cpython_struct,
     CANNOT_FAIL, cpython_api, PyObject, build_type_checkers, CONST_STRING)
 from pypy.module.cpyext.pyobject import (
     make_typedescr, track_reference, from_ref)
-from pypy.interpreter.error import OperationError
 from rpython.rlib.rstruct import runpack
-from pypy.objspace.std.floatobject import W_FloatObject
+from pypy.objspace.std.floatobject import W_FloatObject, basestring_to_float
 
 PyFloatObjectStruct = lltype.ForwardReference()
 PyFloatObject = lltype.Ptr(PyFloatObjectStruct)
@@ -66,7 +66,10 @@
     """Create a PyFloatObject object based on the string value in str, or
     NULL on failure.  The pend argument is ignored.  It remains only for
     backward compatibility."""
-    return space.call_function(space.w_float, w_obj)
+    # avoid space.call_function(space.w_float, w_obj) since PyFloat_FromString
+    # could be type.tp_as_number.nb_float which would recurse
+    value = basestring_to_float(space, w_obj)
+    return space.newfloat(value)
 
 @cpython_api([CONST_STRING, rffi.INT_real], rffi.DOUBLE, error=-1.0)
 def _PyFloat_Unpack4(space, ptr, le):
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -135,6 +135,28 @@
         return space.w_NotImplemented
     return func_with_new_name(_compare, 'descr_' + opname)
 
+def basestring_to_float(space, w_value):
+    def _string_to_float(space, w_source, string):
+        try:
+            return rfloat.string_to_float(string)
+        except ParseStringError as e:
+            raise wrap_parsestringerror(space, e, w_source)
+
+    if space.isinstance_w(w_value, space.w_unicode):
+        from unicodeobject import unicode_to_decimal_w
+        value = _string_to_float(space, w_value,
+                     unicode_to_decimal_w(space, w_value))
+    else:
+        try:
+            value = space.charbuf_w(w_value)
+        except OperationError as e:
+            if e.match(space, space.w_TypeError):
+                raise oefmt(
+                space.w_TypeError,
+                "float() argument must be a string or a number")
+            raise
+        value = _string_to_float(space, w_value, value)
+    return value
 
 class W_FloatObject(W_Root):
     """This is a implementation of the app-level 'float' type.
@@ -193,32 +215,14 @@
     @staticmethod
     @unwrap_spec(w_x=WrappedDefault(0.0))
     def descr__new__(space, w_floattype, w_x):
-        def _string_to_float(space, w_source, string):
-            try:
-                return rfloat.string_to_float(string)
-            except ParseStringError as e:
-                raise wrap_parsestringerror(space, e, w_source)
-
         w_value = w_x     # 'x' is the keyword argument name in CPython
         if space.lookup(w_value, "__float__") is not None:
             w_obj = space.float(w_value)
             if space.is_w(w_floattype, space.w_float):
                 return w_obj
             value = space.float_w(w_obj)
-        elif space.isinstance_w(w_value, space.w_unicode):
-            from unicodeobject import unicode_to_decimal_w
-            value = _string_to_float(space, w_value,
-                                     unicode_to_decimal_w(space, w_value))
         else:
-            try:
-                value = space.charbuf_w(w_value)
-            except OperationError as e:
-                if e.match(space, space.w_TypeError):
-                    raise oefmt(
-                        space.w_TypeError,
-                        "float() argument must be a string or a number")
-                raise
-            value = _string_to_float(space, w_value, value)
+            value = basestring_to_float(space, w_value)
         w_obj = space.allocate_instance(W_FloatObject, w_floattype)
         W_FloatObject.__init__(w_obj, value)
         return w_obj

From pypy.commits at gmail.com  Thu Nov 16 11:30:05 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 08:30:05 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop-2: a branch where to
 try to fix issue 2683 in a different (and simpler) way than
 continulet-no-frame-loop
Message-ID: <5a0dbd0d.3bb0df0a.d4ca0.62d7@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop-2
Changeset: r93056:5dfc7af8c0ff
Date: 2017-11-16 16:22 +0100
http://bitbucket.org/pypy/pypy/changeset/5dfc7af8c0ff/

Log:	a branch where to try to fix issue 2683 in a different (and simpler)
	way than continulet-no-frame-loop


From pypy.commits at gmail.com  Thu Nov 16 11:30:07 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 08:30:07 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop-2: cherry pick two
 failing tests from the branch continulet-no-frame-loop
Message-ID: <5a0dbd0f.3bb0df0a.d4ca0.62e0@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop-2
Changeset: r93057:37701890010b
Date: 2017-11-16 16:25 +0100
http://bitbucket.org/pypy/pypy/changeset/37701890010b/

Log:	cherry pick two failing tests from the branch continulet-no-frame-
	loop

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -365,27 +365,47 @@
         assert res == 2002
         assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
-    def test_f_back(self):
+    def test_f_back_no_cycles(self):
         import sys
         from _continuation import continulet
         stack = self.stack
         #
         def bar(c):
-            assert stack() == ['bar', 'foo', 'test_f_back']
+            f = sys._getframe(0)
+            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
+            c.switch(f)
+            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
+        def foo(c):
+            bar(c)
+        #
+        c = continulet(foo)
+        assert stack() == ['test_f_back_no_cycles']
+        f = c.switch()
+        assert stack() == ['test_f_back_no_cycles']
+        assert stack(f) == ['bar', 'foo']
+        c.switch()
+
+    def test_f_back_complex(self):
+        import sys
+        from _continuation import continulet
+        stack = self.stack
+        #
+        def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back_complex']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             #
-            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex']
             c.switch(sys._getframe(1).f_back)
             #
-            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex']
             assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
-        assert stack() == ['test_f_back']
+        assert stack() == ['test_f_back_complex']
         c = continulet(foo)
         f1_bar = c.switch()
         assert f1_bar.f_code.co_name == 'bar'
@@ -398,15 +418,16 @@
         def main():
             f4_main = c.switch()
             assert f4_main.f_code.co_name == 'main'
-            assert f3_foo.f_back is f1_bar    # not running, so a loop
-            assert stack() == ['main', 'test_f_back']
-            assert stack(f1_bar) == ['bar', 'foo', '...']
+            assert f3_foo.f_back is None    # not running
+            assert stack() == ['main', 'test_f_back_complex']
+            assert stack(f1_bar) == ['bar', 'foo']
         #
         def main2():
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
-            assert f3_foo.f_back is f1_bar    # not running, so a loop
-            assert stack(f1_bar) == ['bar', 'foo', '...']
+            assert f3_foo.f_back is None    # not running
+            assert stack() == ['main2', 'test_f_back_complex']
+            assert stack(f1_bar) == ['bar', 'foo']
         #
         main()
         main2()

From pypy.commits at gmail.com  Thu Nov 16 11:30:09 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 08:30:09 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop-2: WIP: (antocuni,
 arigato): refactor things so that we no longer need a bottomframe,
 and that the bottom-most frame of each continulet is always None: this
 mimics more closely the stack of greenlets on CPython,
 and avoid building frame cycles. The corresponding test_f_back_* are failing
 right now because they are still checking the old behavior
Message-ID: <5a0dbd11.d08edf0a.a9d08.857b@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop-2
Changeset: r93058:55154dad821a
Date: 2017-11-16 17:28 +0100
http://bitbucket.org/pypy/pypy/changeset/55154dad821a/

Log:	WIP: (antocuni, arigato): refactor things so that we no longer need
	a bottomframe, and that the bottom-most frame of each continulet is
	always None: this mimics more closely the stack of greenlets on
	CPython, and avoid building frame cycles. The corresponding
	test_f_back_* are failing right now because they are still checking
	the old behavior

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -1,6 +1,7 @@
 from rpython.rlib.rstacklet import StackletThread
 from rpython.rlib import jit
 from pypy.interpreter.error import OperationError, get_cleared_operation_error
+from pypy.interpreter.error import oefmt
 from pypy.interpreter.executioncontext import ExecutionContext
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.typedef import TypeDef
@@ -30,17 +31,9 @@
             raise geterror(self.space, "continulet already __init__ialized")
         sthread = build_sthread(self.space)
         #
-        # hackish: build the frame "by hand", passing it the correct arguments
         space = self.space
-        w_args, w_kwds = __args__.topacked()
-        bottomframe = space.createframe(get_entrypoint_pycode(space),
-                                        get_w_module_dict(space), None)
-        bottomframe.locals_cells_stack_w[0] = self
-        bottomframe.locals_cells_stack_w[1] = w_callable
-        bottomframe.locals_cells_stack_w[2] = w_args
-        bottomframe.locals_cells_stack_w[3] = w_kwds
-        bottomframe.last_exception = get_cleared_operation_error(space)
-        self.bottomframe = bottomframe
+        self.w_callable = w_callable
+        self.__args__ = __args__.prepend(self)
         #
         global_state.origin = self
         self.sthread = sthread
@@ -221,9 +214,14 @@
     self = global_state.origin
     self.h = h
     global_state.clear()
+    self.backframeref = self.sthread.ec.topframeref
+    self.sthread.ec.topframeref = jit.vref_None
     try:
-        frame = self.bottomframe
-        w_result = frame.execute_frame()
+        w_res = self.descr_switch()
+        if w_res is not self.space.w_None:
+            raise oefmt(self.space.w_TypeError,
+                        "can't send non-None value to a just-started continulet")
+        w_result = self.space.call_args(self.w_callable, self.__args__)
     except Exception as e:
         global_state.propagate_exception = e
     else:
@@ -241,9 +239,9 @@
     self.h, origin.h = origin.h, h
     #
     current = sthread.ec.topframeref
-    sthread.ec.topframeref = self.bottomframe.f_backref
-    self.bottomframe.f_backref = origin.bottomframe.f_backref
-    origin.bottomframe.f_backref = current
+    sthread.ec.topframeref = self.backframeref
+    self.backframeref = origin.backframeref
+    origin.backframeref = current
     #
     return get_result()
 

From pypy.commits at gmail.com  Thu Nov 16 12:44:58 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 16 Nov 2017 09:44:58 -0800 (PST)
Subject: [pypy-commit] pypy default: graft oneliner from stdlib from 2.7.14
 to fix tests
Message-ID: <5a0dce9a.6583df0a.49d9e.33ce@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93059:107848cb5acc
Date: 2017-11-16 19:44 +0200
http://bitbucket.org/pypy/pypy/changeset/107848cb5acc/

Log:	graft oneliner from stdlib from 2.7.14 to fix tests

diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py
--- a/lib-python/2.7/test/test_urllib2net.py
+++ b/lib-python/2.7/test/test_urllib2net.py
@@ -286,7 +286,7 @@
             self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
             u.close()
 
-    FTP_HOST = 'ftp://ftp.debian.org/debian/'
+    FTP_HOST = 'ftp://www.pythontest.net/'
 
     def test_ftp_basic(self):
         self.assertIsNone(socket.getdefaulttimeout())

From pypy.commits at gmail.com  Thu Nov 16 13:30:50 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 10:30:50 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop-2: fix and simplify
 test_f_back_*: now that we hide the frames below bottomframe,
 a part of the test does not longer makes sense since we don't have any frame
 to check :)
Message-ID: <5a0dd95a.8cabdf0a.e68ae.4d86@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop-2
Changeset: r93060:e8f933d33b7e
Date: 2017-11-16 18:52 +0100
http://bitbucket.org/pypy/pypy/changeset/e8f933d33b7e/

Log:	fix and simplify test_f_back_*: now that we hide the frames below
	bottomframe, a part of the test does not longer makes sense since we
	don't have any frame to check :)

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -372,9 +372,9 @@
         #
         def bar(c):
             f = sys._getframe(0)
-            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
+            assert stack() == ['bar', 'foo']
             c.switch(f)
-            assert stack() == ['bar', 'foo', 'test_f_back_no_cycles']
+            assert stack() == ['bar', 'foo']
         def foo(c):
             bar(c)
         #
@@ -391,17 +391,13 @@
         stack = self.stack
         #
         def bar(c):
-            assert stack() == ['bar', 'foo', 'test_f_back_complex']
+            assert stack() == ['bar', 'foo']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
             #
-            assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex']
+            assert stack() == ['bar', 'foo']
             c.switch(sys._getframe(1).f_back)
-            #
-            assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex']
-            assert sys._getframe(2) is f3_foo.f_back
-            c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
@@ -416,21 +412,13 @@
         assert f1_bar.f_back is f3_foo
         #
         def main():
-            f4_main = c.switch()
-            assert f4_main.f_code.co_name == 'main'
+            f4_None = c.switch()
+            assert f4_None is None
             assert f3_foo.f_back is None    # not running
             assert stack() == ['main', 'test_f_back_complex']
             assert stack(f1_bar) == ['bar', 'foo']
         #
-        def main2():
-            f5_main2 = c.switch()
-            assert f5_main2.f_code.co_name == 'main2'
-            assert f3_foo.f_back is None    # not running
-            assert stack() == ['main2', 'test_f_back_complex']
-            assert stack(f1_bar) == ['bar', 'foo']
-        #
         main()
-        main2()
         res = c.switch()
         assert res is None
         assert f3_foo.f_back is None

From pypy.commits at gmail.com  Thu Nov 16 13:30:52 2017
From: pypy.commits at gmail.com (antocuni)
Date: Thu, 16 Nov 2017 10:30:52 -0800 (PST)
Subject: [pypy-commit] pypy continulet-no-frame-loop-2: fix permute,
 and rewrite the corresponding test since we can no longer check
 what is the 'back' frame
Message-ID: <5a0dd95c.22a8df0a.e5ef.d86a@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: continulet-no-frame-loop-2
Changeset: r93061:8c14e037eea6
Date: 2017-11-16 19:21 +0100
http://bitbucket.org/pypy/pypy/changeset/8c14e037eea6/

Log:	fix permute, and rewrite the corresponding test since we can no
	longer check what is the 'back' frame

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -280,8 +280,7 @@
     #
     if len(contlist) > 1:
         otherh = contlist[-1].h
-        otherb = contlist[-1].bottomframe.f_backref
+        otherb = contlist[-1].backframeref
         for cont in contlist:
             otherh, cont.h = cont.h, otherh
-            b = cont.bottomframe
-            otherb, b.f_backref = b.f_backref, otherb
+            otherb, cont.backframeref = cont.backframeref, otherb
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -714,24 +714,31 @@
         import sys
         from _continuation import continulet, permute
         #
-        def f1(c1):
-            res = c1.switch()
-            assert res == "ok"
-            return "done"
+        def a(c):
+            seen.append(2)
+            res = c.switch()
+            assert res == 'b'
+            seen.append(6)
+            return 'a'
+        def b(c):
+            seen.append(3)
+            c.switch()
+            seen.append(5)
+            return 'b'
         #
-        def f2(c2):
-            assert sys._getframe(1).f_code.co_name == 'main'
-            permute(c1, c2)
-            assert sys._getframe(1).f_code.co_name == 'f1'
-            return "ok"
-        #
-        c1 = continulet(f1)
-        c2 = continulet(f2)
-        def main():
-            c1.switch()
-            res = c2.switch()
-            assert res == "done"
-        main()
+        seen = []
+        c1 = continulet(a)
+        c2 = continulet(b)
+        seen.append(1)
+        c1.switch()
+        c2.switch()
+        seen.append(4)
+        permute(c1, c2)
+        res = c1.switch()
+        assert res == 'a'
+        assert not c2.is_pending()
+        seen.append(7)
+        assert seen == [1, 2, 3, 4, 5, 6, 7]
 
     def test_permute_noninitialized(self):
         from _continuation import continulet, permute

From pypy.commits at gmail.com  Thu Nov 16 14:12:31 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 16 Nov 2017 11:12:31 -0800 (PST)
Subject: [pypy-commit] pypy refactor-PyFloat_FromString: document and close
 branch to merge
Message-ID: <5a0de31f.55281c0a.7303d.572b@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: refactor-PyFloat_FromString
Changeset: r93062:2bf28f126b37
Date: 2017-11-16 20:31 +0200
http://bitbucket.org/pypy/pypy/changeset/2bf28f126b37/

Log:	document and close branch to merge

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -26,3 +26,8 @@
 
 .. branch: fix-vmprof-stacklet-switch
 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: refactor-PyFloat_FromString
+Refactor PyFloat_FromString so it can be used inside nb_float, together with
+a pull request to NumPy makes string ndarray float(a) and a.__float__() follow the
+same code path

From pypy.commits at gmail.com  Thu Nov 16 14:41:08 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 16 Nov 2017 11:41:08 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Let SyntaxError tracebacks show the bad
 code line (hard to test)
Message-ID: <5a0de9d4.01141c0a.f0840.945f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93063:12061ebde67d
Date: 2017-11-16 19:40 +0000
http://bitbucket.org/pypy/pypy/changeset/12061ebde67d/

Log:	Let SyntaxError tracebacks show the bad code line (hard to test)

diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -14,7 +14,20 @@
     def wrap_info(self, space):
         w_text = w_filename = space.w_None
         offset = self.offset
-        if self.text is not None:
+        w_lineno = space.newint(self.lineno)
+        if self.filename is not None:
+            w_filename = space.newfilename(self.filename)
+        if self.text is None and self.filename is not None:
+            w_text = space.appexec([w_filename, w_lineno],
+                """(filename, lineno):
+                    try:
+                        with open(filename) as f:
+                            for _ in range(lineno):
+                                f.read()
+                            return f.read()
+                    except:  # we can't allow any exceptions here!
+                        return None""")
+        elif self.text is not None:
             from rpython.rlib.runicode import str_decode_utf_8
             # self.text may not be UTF-8 in case of decoding errors.
             # adjust the encoded text offset to a decoded offset
@@ -29,20 +42,15 @@
                 text, _ = str_decode_utf_8(self.text, len(self.text),
                                            'replace')
             w_text = space.newunicode(text)
-        if self.filename is not None:
-            w_filename = space.newfilename(self.filename)
-        return space.newtuple([space.newtext(self.msg),
-                               space.newtuple([w_filename,
-                                               space.newint(self.lineno),
-                                               space.newint(offset),
-                                               w_text,
-                                               space.newint(self.lastlineno)])])
+        return space.newtuple([
+            space.newtext(self.msg),
+            space.newtuple([
+                w_filename, w_lineno, space.newint(offset),
+                w_text, space.newint(self.lastlineno)])])
 
     def __str__(self):
-        return "%s at pos (%d, %d) in %r" % (self.__class__.__name__,
-                                             self.lineno,
-                                             self.offset,
-                                             self.text)
+        return "%s at pos (%d, %d) in %r" % (
+            self.__class__.__name__, self.lineno, self.offset, self.text)
 
 class IndentationError(SyntaxError):
     pass
@@ -51,10 +59,11 @@
     def __init__(self, lineno=0, offset=0, text=None, filename=None,
                  lastlineno=0):
         msg = "inconsistent use of tabs and spaces in indentation"
-        IndentationError.__init__(self, msg, lineno, offset, text, filename, lastlineno)
+        IndentationError.__init__(
+            self, msg, lineno, offset, text, filename, lastlineno)
 
 class ASTError(Exception):
-    def __init__(self, msg, ast_node ):
+    def __init__(self, msg, ast_node):
         self.msg = msg
         self.ast_node = ast_node
 
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -733,10 +733,14 @@
             self.w_msg = args_w[0]
         if len(args_w) == 2:
             values_w = space.fixedview(args_w[1])
-            if len(values_w) > 0: self.w_filename   = values_w[0]
-            if len(values_w) > 1: self.w_lineno     = values_w[1]
-            if len(values_w) > 2: self.w_offset     = values_w[2]
-            if len(values_w) > 3: self.w_text       = values_w[3]
+            if len(values_w) > 0:
+                self.w_filename = values_w[0]
+            if len(values_w) > 1:
+                self.w_lineno = values_w[1]
+            if len(values_w) > 2:
+                self.w_offset = values_w[2]
+            if len(values_w) > 3:
+                self.w_text = values_w[3]
             if len(values_w) > 4:
                 self.w_lastlineno = values_w[4]   # PyPy extension
                 # kill the extra items from args_w to prevent undesired effects

From pypy.commits at gmail.com  Thu Nov 16 15:21:03 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 16 Nov 2017 12:21:03 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Skip CPython-specific test
Message-ID: <5a0df32f.d7941c0a.eed22.57d7@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93064:44b2fd82cbfa
Date: 2017-11-16 20:20 +0000
http://bitbucket.org/pypy/pypy/changeset/44b2fd82cbfa/

Log:	Skip CPython-specific test

diff --git a/lib-python/3/test/test_cprofile.py b/lib-python/3/test/test_cprofile.py
--- a/lib-python/3/test/test_cprofile.py
+++ b/lib-python/3/test/test_cprofile.py
@@ -1,7 +1,7 @@
 """Test suite for the cProfile module."""
 
 import sys
-from test.support import run_unittest, TESTFN, unlink
+from test.support import run_unittest, TESTFN, unlink, cpython_only
 
 # rip off all interesting stuff from test_profile
 import cProfile
@@ -16,6 +16,7 @@
         return _ProfileOutput
 
     # Issue 3895.
+    @cpython_only
     def test_bad_counter_during_dealloc(self):
         import _lsprof
         # Must use a file as StringIO doesn't trigger the bug.

From pypy.commits at gmail.com  Thu Nov 16 20:52:10 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 16 Nov 2017 17:52:10 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: (pjenvey) fix SyntaxError.wrap_info()
Message-ID: <5a0e40ca.078bdf0a.27561.4946@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93065:cce3bc563868
Date: 2017-11-17 01:51 +0000
http://bitbucket.org/pypy/pypy/changeset/cce3bc563868/

Log:	(pjenvey) fix SyntaxError.wrap_info()

diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -23,8 +23,8 @@
                     try:
                         with open(filename) as f:
                             for _ in range(lineno):
-                                f.read()
-                            return f.read()
+                                f.readline()
+                            return f.readline()
                     except:  # we can't allow any exceptions here!
                         return None""")
         elif self.text is not None:

From pypy.commits at gmail.com  Thu Nov 16 22:28:55 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 16 Nov 2017 19:28:55 -0800 (PST)
Subject: [pypy-commit] pypy default: Convert ListItem.read_locations from
 dict to set
Message-ID: <5a0e5777.c4d51c0a.55a59.9091@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93066:e2f076c1fae4
Date: 2017-11-17 03:09 +0000
http://bitbucket.org/pypy/pypy/changeset/e2f076c1fae4/

Log:	Convert ListItem.read_locations from dict to set

diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -89,11 +89,11 @@
         self.force_non_null = force_non_null
 
     def read_key(self, position_key):
-        self.dictkey.read_locations[position_key] = True
+        self.dictkey.read_locations.add(position_key)
         return self.dictkey.s_value
 
     def read_value(self, position_key):
-        self.dictvalue.read_locations[position_key] = True
+        self.dictvalue.read_locations.add(position_key)
         return self.dictvalue.s_value
 
     def same_as(self, other):
diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py
--- a/rpython/annotator/listdef.py
+++ b/rpython/annotator/listdef.py
@@ -30,7 +30,7 @@
         self.s_value = s_value
         self.bookkeeper = bookkeeper
         self.itemof = {}  # set of all ListDefs using this ListItem
-        self.read_locations = {}
+        self.read_locations = set()
         if bookkeeper is None:
             self.dont_change_any_more = True
 
@@ -95,7 +95,7 @@
                 self.notify_update()
             if s_new_value != s_other_value:
                 other.notify_update()
-            self.read_locations.update(other.read_locations)
+            self.read_locations |= other.read_locations
 
     def patch(self):
         for listdef in self.itemof:
@@ -130,7 +130,7 @@
         self.listitem.itemof[self] = True
 
     def read_item(self, position_key):
-        self.listitem.read_locations[position_key] = True
+        self.listitem.read_locations.add(position_key)
         return self.listitem.s_value
 
     def same_as(self, other):

From pypy.commits at gmail.com  Fri Nov 17 03:18:45 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 17 Nov 2017 00:18:45 -0800 (PST)
Subject: [pypy-commit] pypy default: graft part of edb8f85891e5 that
 un-breaks own tests on win32
Message-ID: <5a0e9b65.929bdf0a.ea12b.f13b@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93067:a8d2e8dc97fa
Date: 2017-11-17 10:17 +0200
http://bitbucket.org/pypy/pypy/changeset/a8d2e8dc97fa/

Log:	graft part of edb8f85891e5 that un-breaks own tests on win32

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -62,7 +62,6 @@
         SHARED.join('compat.c'),
         SHARED.join('machine.c'),
         SHARED.join('vmp_stack.c'),
-        SHARED.join('vmprof_mt.c'),
         SHARED.join('vmprof_memory.c'),
         SHARED.join('vmprof_common.c'),
         # symbol table already in separate_module_files
@@ -70,6 +69,10 @@
     post_include_bits=[],
     compile_extra=compile_extra
     )
+if sys.platform.startswith('linux'):
+    eci_kwds['separate_module_files'].append(
+        SHARED.join('vmprof_mt.c'),
+    )
 global_eci = ExternalCompilationInfo(**eci_kwds)
 
 def configure_libbacktrace_linux():

From pypy.commits at gmail.com  Fri Nov 17 08:05:13 2017
From: pypy.commits at gmail.com (fijal)
Date: Fri, 17 Nov 2017 05:05:13 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: get back to the point of passing
 objspace tests with utf8 turnaround removed from codecs
Message-ID: <5a0ede89.5d87df0a.a0b86.fe98@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93068:1e43261d5fd9
Date: 2017-11-17 14:04 +0100
http://bitbucket.org/pypy/pypy/changeset/1e43261d5fd9/

Log:	get back to the point of passing objspace tests with utf8 turnaround
	removed from codecs

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -68,6 +68,308 @@
             flag = rutf8.FLAG_REGULAR
     return flag
 
+# These functions take and return unwrapped rpython strings
+def decode_unicode_escape(space, string):
+    state = space.fromcache(interp_codecs.CodecState)
+    unicodedata_handler = state.get_unicodedata_handler(space)
+    result_utf8, consumed, length, flag = str_decode_unicode_escape(
+        string, "strict",
+        final=True,
+        errorhandler=decode_error_handler(space),
+        ud_handler=unicodedata_handler)
+    return result_utf8, length, flag
+
+def decode_raw_unicode_escape(space, string):
+    result_utf8, consumed, lgt, flag = str_decode_raw_unicode_escape(
+        string, "strict",
+        final=True, errorhandler=decode_error_handler(space))
+    return result_utf8, lgt, flag
+
+def check_ascii_or_raise(space, string):
+    try:
+        rutf8.check_ascii(string)
+    except rutf8.CheckError as e:
+        decode_error_handler(space)('strict', 'ascii',
+                                    'ordinal not in range(128)', string,
+                                    e.pos, e.pos + 1)
+        assert False, "unreachable"
+
+def check_utf8_or_raise(space, string):
+    # Surrogates are accepted and not treated specially at all.
+    # If there happen to be two 3-bytes encoding a pair of surrogates,
+    # you still get two surrogate unicode characters in the result.
+    # These are the Python2 rules; Python3 differs.
+    try:
+        length, flag = rutf8.check_utf8(string, allow_surrogates=True)
+    except rutf8.CheckError as e:
+        # convert position into unicode position
+        lgt, flags = rutf8.check_utf8(string, True, stop=e.pos)
+        decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string,
+                                    lgt, lgt + 1)
+        assert False, "unreachable"
+    return length, flag
+
+def decode_utf8(space, s):
+    # DEPRECATED
+    return (s, check_utf8_or_raise(space, s))
+
+def str_decode_ascii(s, errors, final, errorhandler):
+    try:
+        rutf8.check_ascii(s)
+        return s, len(s), len(s), rutf8.FLAG_ASCII
+    except rutf8.CheckError:
+        return _str_decode_ascii_slowpath(s, errors, final, errorhandler)
+
+def _str_decode_ascii_slowpath(s, errors, final, errorhandler):
+    i = 0
+    res = StringBuilder()
+    while i < len(s):
+        ch = s[i]
+        if ord(ch) > 0x7F:
+            r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)',
+                s, i, i + 1)
+            res.append(r)
+        else:
+            res.append(ch)
+            i += 1
+    ress = res.build()
+    lgt, flag = rutf8.check_utf8(ress, True)
+    return ress, len(s), lgt, flag
+
+def str_decode_latin_1(s, errors, final, errorhandler):
+    xxx
+
+def utf8_encode_latin_1(s, errors, errorhandler):
+    try:
+        rutf8.check_ascii(s)
+        return s
+    except rutf8.CheckError:
+        return _utf8_encode_latin_1_slowpath(s, errors, errorhandler)
+
+def _utf8_encode_latin_1_slowpath(s, errors, errorhandler):
+    res = StringBuilder(len(s))
+    size = len(s)
+    cur = 0
+    i = 0
+    while i < size:
+        if ord(s[i]) <= 0x7F:
+            res.append(s[i])
+        else:
+            oc = rutf8.codepoint_at_pos(s, i)
+            if oc <= 0xFF:
+                res.append(chr(oc))
+                i += 1
+            else:
+                r, pos = errorhandler(errors, 'latin1', 
+                                      'ordinal not in range(256)', s, cur,
+                                      cur + 1)
+                res.append(r)
+                for j in range(pos - cur):
+                    i = rutf8.next_codepoint_pos(s, i)
+                cur = pos
+        cur += 1
+        i += 1
+    r = res.build()
+    return r
+
+class DecodeWrapper(object):
+    def __init__(self, handler):
+        self.orig = handler
+
+    def handle(self, errors, encoding, msg, s, pos, endpos):
+        return self.orig(errors, encoding, msg, s, pos, endpos)
+
+class EncodeWrapper(object):
+    def __init__(self, handler):
+        self.orig = handler
+
+    def handle(self, errors, encoding, msg, s, pos, endpos):
+        return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
+
+#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler):
+#    w = DecodeWrapper(errorhandler)
+#    u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final,
+#                                                w.handle,
+#                                                ud_handler)
+#    return u.encode('utf8'), pos, len(u), _get_flag(u)
+
+def setup_new_encoders_legacy(encoding):
+    encoder_name = 'utf8_encode_' + encoding
+    encoder_call_name = 'unicode_encode_' + encoding
+    decoder_name = 'str_decode_' + encoding
+    def encoder(utf8, errors, errorhandler):
+        u = utf8.decode("utf8")
+        w = EncodeWrapper(errorhandler)
+        return getattr(runicode, encoder_call_name)(u, len(u), errors,
+                       w.handle)
+    def decoder(s, slen, errors, final, errorhandler):
+        w = DecodeWrapper((errorhandler))
+        u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle)
+        return u.encode('utf8'), pos, len(u), _get_flag(u)
+    encoder.__name__ = encoder_name
+    decoder.__name__ = decoder_name
+    if encoder_name not in globals():
+        globals()[encoder_name] = encoder
+    if decoder_name not in globals():
+        globals()[decoder_name] = decoder
+
+def setup():
+    for encoding in ['utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32',
+                     'utf_32_be', 'unicode_internal']:
+        setup_new_encoders_legacy(encoding)
+
+setup()
+
+def utf8_encode_ascii(utf8, errors, errorhandler):
+    """ Don't be confused - this is a slowpath for errors e.g. "ignore"
+    or an obscure errorhandler
+    """
+    res = StringBuilder()
+    i = 0
+    pos = 0
+    while i < len(utf8):
+        ch = rutf8.codepoint_at_pos(utf8, i)
+        if ch >= 0x7F:
+            msg = "ordinal not in range(128)"
+            r, newpos = errorhandler(errors, 'ascii', msg, utf8,
+                pos, pos + 1)
+            for _ in range(newpos - pos):
+                i = rutf8.next_codepoint_pos(utf8, i)
+            pos = newpos
+            res.append(r)
+        else:
+            res.append(chr(ch))
+            i = rutf8.next_codepoint_pos(utf8, i)    
+            pos += 1
+
+    s = res.build()
+    return s
+
+def str_decode_utf8(s, errors, final, errorhandler):
+    """ Same as checking for the valid utf8, but we know the utf8 is not
+    valid so we're trying to either raise or pack stuff with error handler.
+    The key difference is that this is call_may_force
+    """
+    slen = len(s)
+    res = StringBuilder(slen)
+    pos = 0
+    continuation_bytes = 0
+    end = len(s)
+    while pos < end:
+        ordch1 = ord(s[pos])
+        # fast path for ASCII
+        if ordch1 <= 0x7F:
+            pos += 1
+            res.append(chr(ordch1))
+            continue
+
+        if ordch1 <= 0xC1:
+            r, pos = errorhandler(errors, "utf8", "invalid start byte",
+                    s, pos, pos + 1)
+            res.append(r)
+            continue
+
+        pos += 1
+
+        if ordch1 <= 0xDF:
+            if pos >= end:
+                if not final:
+                    break
+                r, pos = errorhandler(errors, "utf8", "unexpected end of data",
+                    s, pos - 1, pos)
+                res.append(r)
+                continue
+            ordch2 = ord(s[pos])
+
+            if rutf8._invalid_byte_2_of_2(ordch2):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos)
+                res.append(r)
+                continue
+            # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
+            pos += 1
+            continuation_bytes += 1
+            res.append(chr(ordch1))
+            res.append(chr(ordch2))
+            continue
+
+        if ordch1 <= 0xEF:
+            if (pos + 2) > end:
+                if not final:
+                    break
+                r, pos = errorhandler(errors, "utf8", "unexpected end of data",
+                    s, pos - 1, pos + 1)
+                res.append(r)
+                continue
+            ordch2 = ord(s[pos])
+            ordch3 = ord(s[pos + 1])
+
+            if rutf8._invalid_byte_2_of_3(ordch1, ordch2, True):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos)
+                res.append(r)
+                continue
+            elif rutf8._invalid_byte_3_of_3(ordch3):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos + 1)
+                res.append(r)
+                continue
+            pos += 2
+
+            # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
+            continuation_bytes += 2
+            res.append(chr(ordch1))
+            res.append(chr(ordch2))
+            res.append(chr(ordch3))
+            continue
+
+        if ordch1 <= 0xF4:
+            if (pos + 3) > end:
+                if not final:
+                    break
+                r, pos = errorhandler(errors, "utf8", "unexpected end of data",
+                    s, pos - 1, pos)
+                res.append(r)
+                continue
+            ordch2 = ord(s[pos])
+            ordch3 = ord(s[pos + 1])
+            ordch4 = ord(s[pos + 2])
+
+            if rutf8._invalid_byte_2_of_4(ordch1, ordch2):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos)
+                res.append(r)
+                continue
+            elif rutf8._invalid_byte_3_of_4(ordch3):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos + 1)
+                res.append(r)
+                continue
+            elif rutf8._invalid_byte_4_of_4(ordch4):
+                r, pos = errorhandler(errors, "utf8", "invalid continuation byte",
+                    s, pos - 1, pos + 2)
+                res.append(r)
+                continue
+
+            pos += 3
+            # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
+            res.append(chr(ordch1))
+            res.append(chr(ordch2))
+            res.append(chr(ordch3))
+            res.append(chr(ordch4))
+            continuation_bytes += 3
+            continue
+
+        r, pos = errorhandler(errors, "utf8", "invalid start byte",
+                s, pos - 1, pos)
+        res.append(r)
+
+    assert pos == end
+    assert pos - continuation_bytes >= 0
+    r = res.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return r, pos - continuation_bytes, lgt, flag
+
 def hexescape(builder, s, pos, digits,
               encoding, errorhandler, message, errors):
     chr = 0
@@ -273,178 +575,57 @@
 
     return builder.build(), pos, outsize, flag
 
-# These functions take and return unwrapped rpython strings and unicodes
-def decode_unicode_escape(space, string):
-    state = space.fromcache(interp_codecs.CodecState)
-    unicodedata_handler = state.get_unicodedata_handler(space)
-    result_utf8, consumed, length, flag = str_decode_unicode_escape(
-        string, "strict",
-        final=True,
-        errorhandler=decode_error_handler(space),
-        ud_handler=unicodedata_handler)
-    return result_utf8, length, flag
+# ____________________________________________________________
+# Raw unicode escape
 
-def decode_raw_unicode_escape(space, string):
-    # XXX pick better length, maybe
-    # XXX that guy does not belong in runicode (nor in rutf8)
-    result_u, consumed = runicode.str_decode_raw_unicode_escape(
-        string, len(string), "strict",
-        final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle)
-    # XXX argh.  we want each surrogate to be encoded separately
-    utf8 = ''.join([u.encode('utf8') for u in result_u])
-    if rutf8.first_non_ascii_char(utf8) == -1:
-        flag = rutf8.FLAG_ASCII
-    elif _has_surrogate(result_u):
-        flag = rutf8.FLAG_HAS_SURROGATES
-    else:
-        flag = rutf8.FLAG_REGULAR
-    return utf8, len(result_u), flag
+def str_decode_raw_unicode_escape(s, errors, final=False,
+                                  errorhandler=None):
+    size = len(s)
+    if size == 0:
+        return '', 0, 0, rutf8.FLAG_ASCII
 
-def check_ascii_or_raise(space, string):
-    try:
-        rutf8.check_ascii(string)
-    except rutf8.CheckError as e:
-        decode_error_handler(space)('strict', 'ascii',
-                                    'ordinal not in range(128)', string,
-                                    e.pos, e.pos + 1)
-        assert False, "unreachable"
+    result = StringBuilder(size)
+    pos = 0
+    while pos < size:
+        ch = s[pos]
 
-def check_utf8_or_raise(space, string):
-    # Surrogates are accepted and not treated specially at all.
-    # If there happen to be two 3-bytes encoding a pair of surrogates,
-    # you still get two surrogate unicode characters in the result.
-    # These are the Python2 rules; Python3 differs.
-    try:
-        length, flag = rutf8.check_utf8(string, allow_surrogates=True)
-    except rutf8.CheckError as e:
-        # convert position into unicode position
-        lgt, flags = rutf8.check_utf8(string, True, stop=e.pos)
-        decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string,
-                                    lgt, lgt + 1)
-        assert False, "unreachable"
-    return length, flag
+        # Non-escape characters are interpreted as Unicode ordinals
+        if ch != '\\':
+            rutf8.unichr_as_utf8_append(result, ord(ch), True)
+            pos += 1
+            continue
 
-def encode_utf8(space, uni):
-    # DEPRECATED
-    # Note that this function never raises UnicodeEncodeError,
-    # since surrogates are allowed, either paired or lone.
-    # A paired surrogate is considered like the non-BMP character
-    # it stands for.  These are the Python2 rules; Python3 differs.
-    return runicode.unicode_encode_utf_8(
-        uni, len(uni), "strict",
-        errorhandler=None,
-        allow_surrogates=True)
+        # \u-escapes are only interpreted iff the number of leading
+        # backslashes is odd
+        bs = pos
+        while pos < size:
+            pos += 1
+            if pos == size or s[pos] != '\\':
+                break
+            result.append('\\')
 
-def decode_utf8(space, s):
-    # DEPRECATED
-    return (s, check_utf8_or_raise(space, s))
+        # we have a backslash at the end of the string, stop here
+        if pos >= size:
+            result.append('\\')
+            break
 
-def str_decode_ascii(s, errors, final, errorhandler):
-    try:
-        rutf8.check_ascii(s)
-        return s, len(s), len(s), rutf8.FLAG_ASCII
-    except rutf8.CheckError:
-        return _str_decode_ascii_slowpath(s, errors, final, errorhandler)
+        if ((pos - bs) & 1 == 0 or
+            pos >= size or
+            (s[pos] != 'u' and s[pos] != 'U')):
+            result.append('\\')
+            rutf8.unichr_as_utf8_append(result, ord(s[pos]), True)
+            pos += 1
+            continue
 
-def _str_decode_ascii_slowpath(s, errors, final, errorhandler):
-    i = 0
-    res = StringBuilder()
-    while i < len(s):
-        ch = s[i]
-        if ord(ch) > 0x7F:
-            r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)',
-                s, i, i + 1)
-            res.append(r)
-        else:
-            res.append(ch)
-            i += 1
-    ress = res.build()
-    lgt, flag = rutf8.check_utf8(ress, True)
-    return ress, len(s), lgt, flag
+        digits = 4 if s[pos] == 'u' else 8
+        message = "truncated \\uXXXX"
+        pos += 1
+        pos = hexescape(result, s, pos, digits,
+                        "rawunicodeescape", errorhandler, message, errors)
 
-# XXX wrappers, think about speed
-
-class DecodeWrapper(object):
-    def __init__(self, handler):
-        self.orig = handler
-
-    def handle(self, errors, encoding, msg, s, pos, endpos):
-        return self.orig(errors, encoding, msg, s, pos, endpos)
-
-class EncodeWrapper(object):
-    def __init__(self, handler):
-        self.orig = handler
-
-    def handle(self, errors, encoding, msg, s, pos, endpos):
-        return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
-
-#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler):
-#    w = DecodeWrapper(errorhandler)
-#    u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final,
-#                                                w.handle,
-#                                                ud_handler)
-#    return u.encode('utf8'), pos, len(u), _get_flag(u)
-
-def setup_new_encoders_legacy(encoding):
-    encoder_name = 'utf8_encode_' + encoding
-    encoder_call_name = 'unicode_encode_' + encoding
-    decoder_name = 'str_decode_' + encoding
-    def encoder(utf8, utf8len, errors, errorhandler):
-        u = utf8.decode("utf8")
-        w = EncodeWrapper(errorhandler)
-        return getattr(runicode, encoder_call_name)(u, len(u), errors,
-                       w.handle)
-    def decoder(s, slen, errors, final, errorhandler):
-        w = DecodeWrapper((errorhandler))
-        u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle)
-        return u.encode('utf8'), pos, len(u), _get_flag(u)
-    encoder.__name__ = encoder_name
-    decoder.__name__ = decoder_name
-    if encoder_name not in globals():
-        globals()[encoder_name] = encoder
-    if decoder_name not in globals():
-        globals()[decoder_name] = decoder
-
-def setup():
-    for encoding in ['raw_unicode_escape',
-                     'utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32',
-                     'utf_32_be', 'latin_1', 'unicode_internal']:
-        setup_new_encoders_legacy(encoding)
-
-setup()
-
-def utf8_encode_ascii(utf8, errors, errorhandler):
-    """ Don't be confused - this is a slowpath for errors e.g. "ignore"
-    or an obscure errorhandler
-    """
-    res = StringBuilder()
-    i = 0
-    pos = 0
-    while i < len(utf8):
-        ch = rutf8.codepoint_at_pos(utf8, i)
-        if ch >= 0x7F:
-            msg = "ordinal not in range(128)"
-            r, newpos = errorhandler(errors, 'ascii', msg, utf8,
-                pos, pos + 1)
-            for _ in range(newpos - pos):
-                i = rutf8.next_codepoint_pos(utf8, i)
-            pos = newpos
-            res.append(r)
-        else:
-            res.append(chr(ch))
-            i = rutf8.next_codepoint_pos(utf8, i)    
-            pos += 1
-
-    s = res.build()
-    return s
-
-# some irregular interfaces
-def str_decode_utf8(s, slen, errors, final, errorhandler):
-    xxxx
-
-    u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle,
-        runicode.allow_surrogate_by_default)
-    return u.encode('utf8'), pos, len(u), _get_flag(u)
+    r = result.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return r, pos, lgt, flag
 
 # ____________________________________________________________
 # utf-7
@@ -660,7 +841,6 @@
             base64bits >= 6 or
             (base64bits > 0 and base64buffer != 0)):
             msg = "unterminated shift sequence"
-            xxxx
             res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos)
             reslen, resflags = rutf8.check_utf8(res, True)
             outsize += reslen
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -473,7 +473,7 @@
         lgt, flag = rutf8.check_utf8(string, allow_surrogates=True)
     except rutf8.CheckError:
         res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string,
-            len(string), errors, final, state.decode_error_handler)
+            errors, final, state.decode_error_handler)
         return space.newtuple([space.newutf8(res, lgt, flag),
                                space.newint(consumed)])
     else:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -199,7 +199,6 @@
         return
     raise CheckError(res)
 
-
 @jit.elidable
 def first_non_ascii_char(s):
     for i in range(len(s)):

From pypy.commits at gmail.com  Fri Nov 17 12:06:11 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 17 Nov 2017 09:06:11 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Line numbers start from 1
Message-ID: <5a0f1703.86081c0a.2e97f.1042@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93071:43c4fa3bea61
Date: 2017-11-17 17:05 +0000
http://bitbucket.org/pypy/pypy/changeset/43c4fa3bea61/

Log:	Line numbers start from 1

diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -22,7 +22,7 @@
                 """(filename, lineno):
                     try:
                         with open(filename) as f:
-                            for _ in range(lineno):
+                            for _ in range(lineno - 1):
                                 f.readline()
                             return f.readline()
                     except:  # we can't allow any exceptions here!

From pypy.commits at gmail.com  Fri Nov 17 12:33:13 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 17 Nov 2017 09:33:13 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix did test to match PyPy bytecode
Message-ID: <5a0f1d59.4a981c0a.528ef.0735@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93072:4f9bca50104e
Date: 2017-11-17 17:32 +0000
http://bitbucket.org/pypy/pypy/changeset/4f9bca50104e/

Log:	Fix did test to match PyPy bytecode

diff --git a/lib-python/3/test/test_dis.py b/lib-python/3/test/test_dis.py
--- a/lib-python/3/test/test_dis.py
+++ b/lib-python/3/test/test_dis.py
@@ -147,23 +147,24 @@
     pass
 
 dis_bug1333982 = """\
-%3d           0 LOAD_CONST               1 (0)
-              3 POP_JUMP_IF_TRUE        35
-              6 LOAD_GLOBAL              0 (AssertionError)
-              9 LOAD_CONST               2 (<code object <listcomp> at 0x..., file "%s", line %d>)
-             12 LOAD_CONST               3 ('bug1333982.<locals>.<listcomp>')
-             15 MAKE_FUNCTION            0
-             18 LOAD_FAST                0 (x)
-             21 GET_ITER
-             22 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
+%3d           0 JUMP_IF_NOT_DEBUG       35 (to 38)
+              3 LOAD_CONST               1 (0)
+              6 POP_JUMP_IF_TRUE        38
+              9 LOAD_GLOBAL              0 (AssertionError)
+             12 LOAD_CONST               2 (<code object <listcomp> at 0x..., file "%s", line %d>)
+             15 LOAD_CONST               3 ('bug1333982.<locals>.<listcomp>')
+             18 MAKE_FUNCTION            0
+             21 LOAD_FAST                0 (x)
+             24 GET_ITER
+             25 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
 
-%3d          25 LOAD_CONST               4 (1)
-             28 BINARY_ADD
-             29 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
-             32 RAISE_VARARGS            1
+%3d          28 LOAD_CONST               4 (1)
+             31 BINARY_ADD
+             32 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
+             35 RAISE_VARARGS            1
 
-%3d     >>   35 LOAD_CONST               0 (None)
-             38 RETURN_VALUE
+%3d     >>   38 LOAD_CONST               0 (None)
+             41 RETURN_VALUE
 """ % (bug1333982.__code__.co_firstlineno + 1,
        __file__,
        bug1333982.__code__.co_firstlineno + 1,

From pypy.commits at gmail.com  Fri Nov 17 14:24:41 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 17 Nov 2017 11:24:41 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: add comment
Message-ID: <5a0f3779.d2addf0a.128b0.3239@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93073:6790b83265fa
Date: 2017-11-17 19:18 +0000
http://bitbucket.org/pypy/pypy/changeset/6790b83265fa/

Log:	add comment

diff --git a/lib-python/3/test/test_dis.py b/lib-python/3/test/test_dis.py
--- a/lib-python/3/test/test_dis.py
+++ b/lib-python/3/test/test_dis.py
@@ -146,6 +146,7 @@
               1)
     pass
 
+# PyPy change: JUMP_IF_NOT_DEBUG
 dis_bug1333982 = """\
 %3d           0 JUMP_IF_NOT_DEBUG       35 (to 38)
               3 LOAD_CONST               1 (0)

From pypy.commits at gmail.com  Fri Nov 17 14:24:43 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 17 Nov 2017 11:24:43 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix doctest to work on builtin functions
 and methods
Message-ID: <5a0f377b.8c6f1c0a.18891.7a0f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93074:ab4627e038e3
Date: 2017-11-17 19:22 +0000
http://bitbucket.org/pypy/pypy/changeset/ab4627e038e3/

Log:	Fix doctest to work on builtin functions and methods

diff --git a/lib-python/3/doctest.py b/lib-python/3/doctest.py
--- a/lib-python/3/doctest.py
+++ b/lib-python/3/doctest.py
@@ -939,6 +939,8 @@
         elif inspect.getmodule(object) is not None:
             return module is inspect.getmodule(object)
         elif inspect.isfunction(object):
+            if isinstance(object.__code__, inspect._builtin_code_type):
+                return True  # XXX: A PyPy builtin - no way to tell
             return module.__dict__ is object.__globals__
         elif inspect.ismethoddescriptor(object):
             if hasattr(object, '__objclass__'):
diff --git a/lib-python/3/test/test_doctest.py b/lib-python/3/test/test_doctest.py
--- a/lib-python/3/test/test_doctest.py
+++ b/lib-python/3/test/test_doctest.py
@@ -660,7 +660,7 @@
 
     >>> import builtins
     >>> tests = doctest.DocTestFinder().find(builtins)
-    >>> lo, hi = (120, 140) if is_pypy else (790, 810)
+    >>> lo, hi = (420, 440) if is_pypy else (790, 810)
     >>> lo < len(tests) < hi # approximate number of objects with docstrings
     True
     >>> real_tests = [t for t in tests if len(t.examples) > 0]

From pypy.commits at gmail.com  Fri Nov 17 17:54:03 2017
From: pypy.commits at gmail.com (gabr...@ec2-54-146-239-158.compute-1.amazonaws.com)
Date: Fri, 17 Nov 2017 14:54:03 -0800 (PST)
Subject: [pypy-commit] pypy default: Declare _PyLong_FromByteArray space
 parameter as const.
Message-ID: <5a0f688b.21b9df0a.57c2e.4f99@mx.google.com>

Author: gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com
Branch: 
Changeset: r93075:d929dd0ac8bc
Date: 2017-11-17 13:04 -0300
http://bitbucket.org/pypy/pypy/changeset/d929dd0ac8bc/

Log:	Declare _PyLong_FromByteArray space parameter as const.

diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -224,8 +224,9 @@
     assert isinstance(w_long, W_LongObject)
     return w_long.num.sign
 
-UCHARP = rffi.CArrayPtr(rffi.UCHAR)
- at cpython_api([UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject)
+CONST_UCHARP = lltype.Ptr(lltype.Array(lltype.UChar, hints={'nolength': True,
+                                       'render_as_const': True}))
+ at cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject)
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):
     little_endian = rffi.cast(lltype.Signed, little_endian)
     signed = rffi.cast(lltype.Signed, signed)

From pypy.commits at gmail.com  Sat Nov 18 04:57:13 2017
From: pypy.commits at gmail.com (arigo)
Date: Sat, 18 Nov 2017 01:57:13 -0800 (PST)
Subject: [pypy-commit] pypy default: Fix d929dd0ac8bc
Message-ID: <5a1003f9.cc9cdf0a.db917.040f@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93076:4791c8513684
Date: 2017-11-18 10:56 +0100
http://bitbucket.org/pypy/pypy/changeset/4791c8513684/

Log:	Fix d929dd0ac8bc

diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -224,7 +224,7 @@
     assert isinstance(w_long, W_LongObject)
     return w_long.num.sign
 
-CONST_UCHARP = lltype.Ptr(lltype.Array(lltype.UChar, hints={'nolength': True,
+CONST_UCHARP = lltype.Ptr(lltype.Array(rffi.UCHAR, hints={'nolength': True,
                                        'render_as_const': True}))
 @cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject)
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):

From pypy.commits at gmail.com  Sat Nov 18 23:33:17 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 18 Nov 2017 20:33:17 -0800 (PST)
Subject: [pypy-commit] pypy fix-broken-types: fix some translation issues
Message-ID: <5a11098d.c9b81c0a.6f4b2.ed51@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: fix-broken-types
Changeset: r93077:1497f86a109d
Date: 2016-11-23 07:27 +0000
http://bitbucket.org/pypy/pypy/changeset/1497f86a109d/

Log:	fix some translation issues

diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -384,9 +384,9 @@
             w_ob = w_ob.convert_to_object()
         #
         if space.isinstance_w(w_ob, space.w_str):
-            value = self.cast_str(w_ob)
+            value = float(self.cast_str(w_ob))
         elif space.isinstance_w(w_ob, space.w_unicode):
-            value = self.cast_unicode(w_ob)
+            value = float(self.cast_unicode(w_ob))
         else:
             value = space.float_w(w_ob)
         w_cdata = cdataobj.W_CDataMem(space, self)
diff --git a/pypy/module/math/interp_math.py b/pypy/module/math/interp_math.py
--- a/pypy/module/math/interp_math.py
+++ b/pypy/module/math/interp_math.py
@@ -341,7 +341,7 @@
     if partials:
         hi = partials[-1]
         j = 0
-        lo = 0
+        lo = 0.0
         for j in range(len(partials) - 2, -1, -1):
             v = hi
             y = partials[j]
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -454,8 +454,8 @@
             return Float64(self.space).box(self.unbox(v))
         # numpy 1.10 compatibility
         raise oefmt(self.space.w_TypeError, "ufunc casting failure")
-            
-            
+
+
 
 class Integer(Primitive):
     _mixin_ = True
@@ -1058,9 +1058,9 @@
     def logaddexp2(self, v1, v2):
         tmp = v1 - v2
         if tmp > 0:
-            return v1 + self.npy_log2_1p(math.pow(2, -tmp))
+            return v1 + self.npy_log2_1p(math.pow(2., -tmp))
         if tmp <= 0:
-            return v2 + self.npy_log2_1p(math.pow(2, tmp))
+            return v2 + self.npy_log2_1p(math.pow(2., tmp))
         else:
             return v1 + v2
 
@@ -1179,11 +1179,11 @@
         imag_str += 'j'
 
         # (0+2j) => 2j
-        if real == 0 and math.copysign(1, real) == 1:
+        if real == 0. and math.copysign(1., real) == 1.:
             return imag_str
 
         real_str = str_format(real)
-        op = '+' if imag >= 0 or rfloat.isnan(imag) else ''
+        op = '+' if imag >= 0. or rfloat.isnan(imag) else ''
         return ''.join(['(', real_str, op, imag_str, ')'])
 
     def runpack_str(self, space, s, native):
@@ -1501,13 +1501,13 @@
             return rfloat.NAN, 0
         if v[0] == 0.0:
             if v[1] == 0:
-                return 0, 0
+                return 0., 0
             if v[1] > 0:
-                return 1, 0
-            return -1, 0
+                return 1., 0
+            return -1., 0
         if v[0] > 0:
-            return 1, 0
-        return -1, 0
+            return 1., 0
+        return -1., 0
 
     def fmax(self, v1, v2):
         if self.ge(v1, v2) or self.isnan(v2):
diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py
--- a/pypy/objspace/std/complexobject.py
+++ b/pypy/objspace/std/complexobject.py
@@ -220,7 +220,7 @@
         div = math.floor(w_div.realval)
         w_mod = self.sub(
             W_ComplexObject(other.realval * div, other.imagval * div))
-        return (W_ComplexObject(div, 0), w_mod)
+        return (W_ComplexObject(div, 0.), w_mod)
 
     def pow(self, other):
         rr, ir = rcomplex.c_pow(self.as_tuple(), other.as_tuple())
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -336,7 +336,7 @@
                     raise oefmt(space.w_OverflowError, "too large")
                 else:
                     lsb = max(top_exp, rfloat.DBL_MIN_EXP) - rfloat.DBL_MANT_DIG
-                    value = 0
+                    value = 0.
                     if exp >= lsb:
                         for j in range(total_digits - 1, -1, -1):
                             value = 16.0 * value + _hex_digit(s, j, co_end,
diff --git a/rpython/rlib/rcomplex.py b/rpython/rlib/rcomplex.py
--- a/rpython/rlib/rcomplex.py
+++ b/rpython/rlib/rcomplex.py
@@ -70,11 +70,11 @@
 
 def c_pow(x, y):
     (r1, i1), (r2, i2) = x, y
-    if i1 == 0 and i2 == 0 and r1 > 0:
+    if i1 == 0. and i2 == 0. and r1 > 0.:
         rr = math.pow(r1, r2)
         ir = 0.
     elif r2 == 0.0 and i2 == 0.0:
-        rr, ir = 1, 0
+        rr, ir = 1., 0.
     elif r1 == 1.0 and i1 == 0.0:
         rr, ir = (1.0, 0.0)
     elif r1 == 0.0 and i1 == 0.0:
@@ -108,22 +108,22 @@
     Method: use symmetries to reduce to the case when x = z.real and y
     = z.imag are nonnegative.  Then the real part of the result is
     given by
-    
+
       s = sqrt((x + hypot(x, y))/2)
-    
+
     and the imaginary part is
-    
+
       d = (y/2)/s
-    
+
     If either x or y is very large then there's a risk of overflow in
     computation of the expression x + hypot(x, y).  We can avoid this
     by rewriting the formula for s as:
-    
+
       s = 2*sqrt(x/8 + hypot(x/8, y/8))
-    
+
     This costs us two extra multiplications/divisions, but avoids the
     overhead of checking for x and y large.
-    
+
     If both x and y are subnormal then hypot(x, y) may also be
     subnormal, so will lack full precision.  We solve this by rescaling
     x and y by a sufficiently large power of 2 to ensure that x and y

From pypy.commits at gmail.com  Sat Nov 18 23:33:20 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 18 Nov 2017 20:33:20 -0800 (PST)
Subject: [pypy-commit] pypy fix-broken-types: hg merge default
Message-ID: <5a110990.83b91c0a.1d24.4768@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: fix-broken-types
Changeset: r93078:93d764ccc576
Date: 2016-11-24 02:11 +0000
http://bitbucket.org/pypy/pypy/changeset/93d764ccc576/

Log:	hg merge default

diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -111,7 +111,9 @@
                 self.keywords = self.keywords + keywords
                 self.keywords_w = self.keywords_w + values_w
             return
+        is_dict = False
         if space.isinstance_w(w_starstararg, space.w_dict):
+            is_dict = True
             keys_w = space.unpackiterable(w_starstararg)
         else:
             try:
@@ -125,7 +127,9 @@
             keys_w = space.unpackiterable(w_keys)
         keywords_w = [None] * len(keys_w)
         keywords = [None] * len(keys_w)
-        _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords, keywords_w, self.keywords)
+        _do_combine_starstarargs_wrapped(
+            space, keys_w, w_starstararg, keywords, keywords_w, self.keywords,
+            is_dict)
         self.keyword_names_w = keys_w
         if self.keywords is None:
             self.keywords = keywords
@@ -355,7 +359,7 @@
                             key)
 
 def _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords,
-        keywords_w, existingkeywords):
+        keywords_w, existingkeywords, is_dict):
     i = 0
     for w_key in keys_w:
         try:
@@ -374,7 +378,16 @@
                             "got multiple values for keyword argument '%s'",
                             key)
         keywords[i] = key
-        keywords_w[i] = space.getitem(w_starstararg, w_key)
+        if is_dict:
+            # issue 2435: bug-to-bug compatibility with cpython. for a subclass of
+            # dict, just ignore the __getitem__ and access the underlying dict
+            # directly
+            from pypy.objspace.descroperation import dict_getitem
+            w_descr = dict_getitem(space)
+            w_value = space.get_and_call_function(w_descr, w_starstararg, w_key)
+        else:
+            w_value = space.getitem(w_starstararg, w_key)
+        keywords_w[i] = w_value
         i += 1
 
 @jit.look_inside_iff(
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -120,6 +120,12 @@
             raise OperationError(AttributeError, name)
         return method(*args)
 
+    def lookup_in_type_where(self, cls, name):
+        return 'hopefully not needed', getattr(cls, name)
+
+    def get_and_call_function(self, w_descr, w_obj, *args):
+        return w_descr.__get__(w_obj)(*args)
+
     def type(self, obj):
         class Type:
             def getname(self, space):
@@ -805,3 +811,19 @@
             assert str(e) == "myerror"
         else:
             assert False, "Expected TypeError"
+
+    def test_dict_subclass_with_weird_getitem(self):
+        # issue 2435: bug-to-bug compatibility with cpython. for a subclass of
+        # dict, just ignore the __getitem__ and behave like ext_do_call in ceval.c
+        # which just uses the underlying dict
+        class d(dict):
+            def __getitem__(self, key):
+                return key
+
+        for key in ["foo", u"foo"]:
+            q = d()
+            q[key] = "bar"
+
+            def test(**kwargs):
+                return kwargs
+            assert test(**q) == {"foo": "bar"}
diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py
--- a/pypy/module/cpyext/dictobject.py
+++ b/pypy/module/cpyext/dictobject.py
@@ -137,8 +137,7 @@
     """This is the same as PyDict_Merge(a, b, 1) in C, or a.update(b) in
     Python.  Return 0 on success or -1 if an exception was raised.
     """
-    space.call_method(space.w_dict, "update", w_obj, w_other)
-    return 0
+    return PyDict_Merge(space, w_obj, w_other, 1)
 
 @cpython_api([PyObject], PyObject)
 def PyDict_Keys(space, w_obj):
diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py
--- a/pypy/module/cpyext/object.py
+++ b/pypy/module/cpyext/object.py
@@ -252,7 +252,10 @@
 def PyObject_Format(space, w_obj, w_format_spec):
     if w_format_spec is None:
         w_format_spec = space.wrap('')
-    return space.call_method(w_obj, '__format__', w_format_spec)
+    w_ret = space.call_method(w_obj, '__format__', w_format_spec)
+    if space.isinstance_w(w_format_spec, space.w_unicode):
+        return space.unicode_from_object(w_ret)
+    return w_ret
 
 @cpython_api([PyObject], PyObject)
 def PyObject_Unicode(space, w_obj):
diff --git a/pypy/module/cpyext/test/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py
--- a/pypy/module/cpyext/test/test_dictobject.py
+++ b/pypy/module/cpyext/test/test_dictobject.py
@@ -103,6 +103,17 @@
         api.PyDict_Update(w_d, w_d2)
         assert space.unwrap(w_d) == dict(a='b', c='d', e='f')
 
+    def test_update_doesnt_accept_list_of_tuples(self, space, api):
+        w_d = space.newdict()
+        space.setitem(w_d, space.wrap("a"), space.wrap("b"))
+
+        w_d2 = space.wrap([("c", "d"), ("e", "f")])
+
+        api.PyDict_Update(w_d, w_d2)
+        assert api.PyErr_Occurred() is space.w_AttributeError
+        api.PyErr_Clear()
+        assert space.unwrap(w_d) == dict(a='b') # unchanged
+
     def test_iter(self, space, api):
         w_dict = space.sys.getdict(space)
         py_dict = make_ref(space, w_dict)
@@ -199,3 +210,18 @@
              """),
             ])
         assert module.dict_proxy({'a': 1, 'b': 2}) == 2
+
+    def test_update(self):
+        module = self.import_extension('foo', [
+            ("update", "METH_VARARGS",
+             '''
+             if (PyDict_Update(PyTuple_GetItem(args, 0), PyTuple_GetItem(args, 1)))
+                return NULL;
+             Py_RETURN_NONE;
+             ''')])
+        d = {"a": 1}
+        module.update(d, {"c": 2})
+        assert d == dict(a=1, c=2)
+        d = {"a": 1}
+        raises(AttributeError, module.update, d, [("c", 2)])
+
diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py
--- a/pypy/module/cpyext/test/test_object.py
+++ b/pypy/module/cpyext/test/test_object.py
@@ -312,6 +312,16 @@
         assert isinstance(dict(), collections.Mapping)
         assert module.ismapping(dict())
 
+    def test_format_returns_unicode(self):
+        module = self.import_extension('foo', [
+            ("empty_format", "METH_O",
+            """
+                PyObject* empty_unicode = PyUnicode_FromStringAndSize("", 0);
+                PyObject* obj = PyObject_Format(args, empty_unicode);
+                return obj;
+            """)])
+        a = module.empty_format('hello')
+        assert isinstance(a, unicode)
 
 class AppTestPyBuffer_FillInfo(AppTestCpythonExtensionBase):
     """
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -331,12 +331,34 @@
                  PyHeapTypeObject *heaptype = (PyHeapTypeObject *)args;
                  Py_INCREF(heaptype->ht_name);
                  return heaptype->ht_name;
+             '''),
+            ("setattr", "METH_O",
              '''
-             )
+                int ret;
+                PyObject* name = PyString_FromString("mymodule");
+                PyObject *obj = PyType_Type.tp_alloc(&PyType_Type, 0);
+                PyHeapTypeObject *type = (PyHeapTypeObject*)obj;
+                if ((type->ht_type.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0)
+                {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "Py_TPFLAGS_HEAPTYPE not set");
+                    return NULL;
+                }
+                type->ht_type.tp_name = ((PyTypeObject*)args)->tp_name;
+                PyType_Ready(&type->ht_type);
+                ret = PyObject_SetAttrString((PyObject*)&type->ht_type,
+                                    "__module__", name);
+                Py_DECREF(name);
+                if (ret < 0)
+                    return NULL;
+                return PyLong_FromLong(ret);
+             '''),
             ])
         class C(object):
             pass
         assert module.name_by_heaptype(C) == "C"
+        assert module.setattr(C) == 0
+
 
     def test_type_dict(self):
         foo = self.import_module("foo")
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -469,7 +469,7 @@
         W_TypeObject.__init__(self, space, name,
             bases_w or [space.w_object], dict_w, force_new_layout=new_layout)
         self.flag_cpytype = True
-        self.flag_heaptype = False
+        self.flag_heaptype = pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE
         # if a sequence or a mapping, then set the flag to force it
         if pto.c_tp_as_sequence and pto.c_tp_as_sequence.c_sq_item:
             self.flag_map_or_seq = 'S'
@@ -852,14 +852,14 @@
     w_obj = space.allocate_instance(W_PyCTypeObject, w_metatype)
     track_reference(space, py_obj, w_obj)
     # __init__ wraps all slotdefs functions from py_type via add_operators
-    w_obj.__init__(space, py_type) 
+    w_obj.__init__(space, py_type)
     w_obj.ready()
 
     finish_type_2(space, py_type, w_obj)
     base = py_type.c_tp_base
     if base:
         # XXX refactor - parts of this are done in finish_type_2 -> inherit_slots
-        if not py_type.c_tp_as_number: 
+        if not py_type.c_tp_as_number:
             py_type.c_tp_as_number = base.c_tp_as_number
             py_type.c_tp_flags |= base.c_tp_flags & Py_TPFLAGS_CHECKTYPES
             py_type.c_tp_flags |= base.c_tp_flags & Py_TPFLAGS_HAVE_INPLACEOPS
diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py
--- a/pypy/objspace/descroperation.py
+++ b/pypy/objspace/descroperation.py
@@ -61,16 +61,24 @@
 @specialize.memo()
 def str_getitem(space):
     "Utility that returns the app-level descriptor str.__getitem__."
-    w_src, w_iter = space.lookup_in_type_where(space.w_str,
-                                               '__getitem__')
-    return w_iter
+    w_src, w_getitem = space.lookup_in_type_where(space.w_str,
+                                                  '__getitem__')
+    return w_getitem
 
 @specialize.memo()
 def unicode_getitem(space):
     "Utility that returns the app-level descriptor unicode.__getitem__."
-    w_src, w_iter = space.lookup_in_type_where(space.w_unicode,
-                                               '__getitem__')
-    return w_iter
+    w_src, w_getitem = space.lookup_in_type_where(space.w_unicode,
+                                                  '__getitem__')
+    return w_getitem
+
+ at specialize.memo()
+def dict_getitem(space):
+    "Utility that returns the app-level descriptor dict.__getitem__."
+    w_src, w_getitem = space.lookup_in_type_where(space.w_dict,
+                                                  '__getitem__')
+    return w_getitem
+
 
 def raiseattrerror(space, w_obj, name, w_descr=None):
     if w_descr is None:
diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -22,7 +22,8 @@
     """Block annotator for RPython.
     See description in doc/translation.txt."""
 
-    def __init__(self, translator=None, policy=None, bookkeeper=None):
+    def __init__(self, translator=None, policy=None, bookkeeper=None,
+            keepgoing=False):
         import rpython.rtyper.extfuncregistry # has side effects
 
         if translator is None:
@@ -50,6 +51,9 @@
         if bookkeeper is None:
             bookkeeper = Bookkeeper(self)
         self.bookkeeper = bookkeeper
+        self.keepgoing = keepgoing
+        self.failed_blocks = set()
+        self.errors = []
         # temporary feature flag, see config.translation.brokentypes
         # defaults to True in real translations
         self.allow_bad_unions = False
@@ -206,6 +210,12 @@
         else:
             newgraphs = self.translator.graphs  #all of them
             got_blocked_blocks = False in self.annotated.values()
+        if self.failed_blocks:
+            text = ('Annotation failed, %s errors were recorded:' %
+                    len(self.errors))
+            text += '\n-----'.join(str(e) for e in self.errors)
+            raise annmodel.AnnotatorError(text)
+
         if got_blocked_blocks:
             for graph in self.blocked_graphs.values():
                 self.blocked_graphs[graph] = True
@@ -352,6 +362,8 @@
 
         #print '* processblock', block, cells
         self.annotated[block] = graph
+        if block in self.failed_blocks:
+            return
         if block in self.blocked_blocks:
             del self.blocked_blocks[block]
         try:
@@ -396,6 +408,10 @@
         except annmodel.UnionError as e:
             # Add source code to the UnionError
             e.source = '\n'.join(source_lines(graph, block, None, long=True))
+            if self.keepgoing:
+                self.errors.append(e)
+                self.failed_blocks.add(block)
+                return
             raise
         # if the merged cells changed, we must redo the analysis
         if unions != oldcells:
@@ -486,6 +502,10 @@
 
         except annmodel.AnnotatorError as e: # note that UnionError is a subclass
             e.source = gather_error(self, graph, block, i)
+            if self.keepgoing:
+                self.errors.append(e)
+                self.failed_blocks.add(block)
+                return
             raise
 
         else:
diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py
--- a/rpython/config/translationoption.py
+++ b/rpython/config/translationoption.py
@@ -193,6 +193,10 @@
                "When true, enable the use of tagged pointers. "
                "If false, use normal boxing",
                default=False),
+    BoolOption("keepgoing",
+               "Continue annotating when errors are encountered, and report "
+               "them all at the end of the annotation phase",
+               default=False, cmdline="--keepgoing"),
     BoolOption("lldebug",
                "If true, makes an lldebug build", default=False,
                cmdline="--lldebug"),
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -399,9 +399,9 @@
 
     def optimize_INT_EQ(self, op):
         arg0 = self.get_box_replacement(op.getarg(0))
+        b1 = self.getintbound(arg0)
         arg1 = self.get_box_replacement(op.getarg(1))
-        b1 = self.getintbound(op.getarg(0))
-        b2 = self.getintbound(op.getarg(1))
+        b2 = self.getintbound(arg1)
         if b1.known_gt(b2):
             self.make_constant_int(op, 0)
         elif b1.known_lt(b2):
diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py
--- a/rpython/rlib/objectmodel.py
+++ b/rpython/rlib/objectmodel.py
@@ -323,6 +323,8 @@
 def register_replacement_for(replaced_function, sandboxed_name=None):
     def wrap(func):
         from rpython.rtyper.extregistry import ExtRegistryEntry
+        # to support calling func directly
+        func._sandbox_external_name = sandboxed_name
         class ExtRegistry(ExtRegistryEntry):
             _about_ = replaced_function
             def compute_annotation(self):
diff --git a/rpython/rlib/rfloat.py b/rpython/rlib/rfloat.py
--- a/rpython/rlib/rfloat.py
+++ b/rpython/rlib/rfloat.py
@@ -1,6 +1,7 @@
 """Float constants"""
 
 import math, struct
+from math import isinf, isnan, copysign, acosh, asinh, atanh, log1p, expm1
 
 from rpython.annotator.model import SomeString, SomeChar
 from rpython.rlib import objectmodel, unroll
@@ -184,104 +185,6 @@
 INFINITY = 1e200 * 1e200
 NAN = abs(INFINITY / INFINITY)    # bah, INF/INF gives us -NAN?
 
-try:
-    # Try to get math functions added in 2.6.
-    from math import isinf, isnan, copysign, acosh, asinh, atanh, log1p
-except ImportError:
-    @not_rpython
-    def isinf(x):
-        return x == INFINITY or x == -INFINITY
-
-    @not_rpython
-    def isnan(v):
-        return v != v
-
-    @not_rpython
-    def copysign(x, y):
-        """Return x with the sign of y"""
-        if x < 0.:
-            x = -x
-        if y > 0. or (y == 0. and math.atan2(y, -1.) > 0.):
-            return x
-        else:
-            return -x
-
-    _2_to_m28 = 3.7252902984619141E-09; # 2**-28
-    _2_to_p28 = 268435456.0; # 2**28
-    _ln2 = 6.93147180559945286227E-01
-
-    @not_rpython
-    def acosh(x):
-        if isnan(x):
-            return NAN
-        if x < 1.:
-            raise ValueError("math domain error")
-        if x >= _2_to_p28:
-            if isinf(x):
-                return x
-            else:
-                return math.log(x) + _ln2
-        if x == 1.:
-            return 0.
-        if x >= 2.:
-            t = x * x
-            return math.log(2. * x - 1. / (x + math.sqrt(t - 1.0)))
-        t = x - 1.0
-        return log1p(t + math.sqrt(2. * t + t * t))
-
-    @not_rpython
-    def asinh(x):
-        absx = abs(x)
-        if not isfinite(x):
-            return x
-        if absx < _2_to_m28:
-            return x
-        if absx > _2_to_p28:
-            w = math.log(absx) + _ln2
-        elif absx > 2.:
-            w = math.log(2. * absx + 1. / (math.sqrt(x * x + 1.) + absx))
-        else:
-            t = x * x
-            w = log1p(absx + t / (1. + math.sqrt(1. + t)))
-        return copysign(w, x)
-
-    @not_rpython
-    def atanh(x):
-        if isnan(x):
-            return x
-        absx = abs(x)
-        if absx >= 1.:
-            raise ValueError("math domain error")
-        if absx < _2_to_m28:
-            return x
-        if absx < .5:
-            t = absx + absx
-            t = .5 * log1p(t + t * absx / (1. - absx))
-        else:
-            t = .5 * log1p((absx + absx) / (1. - absx))
-        return copysign(t, x)
-
-    @not_rpython
-    def log1p(x):
-        if abs(x) < DBL_EPSILON // 2.:
-            return x
-        elif -.5 <= x <= 1.:
-            y = 1. + x
-            return math.log(y) - ((y - 1.) - x) / y
-        else:
-            return math.log(1. + x)
-
-try:
-    from math import expm1 # Added in Python 2.7.
-except ImportError:
-    @not_rpython
-    def expm1(x):
-        if abs(x) < .7:
-            u = math.exp(x)
-            if u == 1.:
-                return x
-            return (u - 1.) * x / math.log(u)
-        return math.exp(x) - 1.
 
 def log2(x):
     # Uses an algorithm that should:
diff --git a/rpython/translator/sandbox/test/test_sandbox.py b/rpython/translator/sandbox/test/test_sandbox.py
--- a/rpython/translator/sandbox/test/test_sandbox.py
+++ b/rpython/translator/sandbox/test/test_sandbox.py
@@ -65,6 +65,24 @@
     f.close()
     assert tail == ""
 
+def test_open_dup_rposix():
+    from rpython.rlib import rposix
+    def entry_point(argv):
+        fd = rposix.open("/tmp/foobar", os.O_RDONLY, 0777)
+        assert fd == 77
+        fd2 = rposix.dup(fd)
+        assert fd2 == 78
+        return 0
+
+    exe = compile(entry_point)
+    g, f = run_in_subprocess(exe)
+    expect(f, g, "ll_os.ll_os_open", ("/tmp/foobar", os.O_RDONLY, 0777), 77)
+    expect(f, g, "ll_os.ll_os_dup",  (77, True), 78)
+    g.close()
+    tail = f.read()
+    f.close()
+    assert tail == ""
+
 def test_read_write():
     def entry_point(argv):
         fd = os.open("/tmp/foobar", os.O_RDONLY, 0777)
diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py
--- a/rpython/translator/translator.py
+++ b/rpython/translator/translator.py
@@ -67,7 +67,8 @@
         if self.annotator is not None:
             raise ValueError("we already have an annotator")
         from rpython.annotator.annrpython import RPythonAnnotator
-        self.annotator = RPythonAnnotator(self, policy=policy)
+        self.annotator = RPythonAnnotator(
+            self, policy=policy, keepgoing=self.config.translation.keepgoing)
         self.annotator.allow_bad_unions = self.config.translation.brokentypes
         return self.annotator
 

From pypy.commits at gmail.com  Sat Nov 18 23:33:22 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 18 Nov 2017 20:33:22 -0800 (PST)
Subject: [pypy-commit] pypy fix-broken-types: translation fixes
Message-ID: <5a110992.31a9df0a.13a2b.796c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: fix-broken-types
Changeset: r93079:839732d2f9d2
Date: 2016-11-24 02:25 +0000
http://bitbucket.org/pypy/pypy/changeset/839732d2f9d2/

Log:	translation fixes

diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -894,7 +894,7 @@
     @simple_unary_op
     def exp2(self, v):
         try:
-            return math.pow(2, v)
+            return math.pow(2., v)
         except OverflowError:
             return rfloat.INFINITY
 
@@ -1587,7 +1587,7 @@
     @complex_unary_op
     def exp2(self, v):
         try:
-            return rcomplex.c_pow((2,0), v)
+            return rcomplex.c_pow((2., 0.), v)
         except OverflowError:
             return rfloat.INFINITY, rfloat.NAN
         except ValueError:
@@ -1752,7 +1752,7 @@
         try:
             return rcomplex.c_log(v[0] + 1, v[1])
         except OverflowError:
-            return -rfloat.INFINITY, 0
+            return -rfloat.INFINITY, 0.
         except ValueError:
             return rfloat.NAN, rfloat.NAN
 
diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py
--- a/pypy/objspace/std/floatobject.py
+++ b/pypy/objspace/std/floatobject.py
@@ -365,7 +365,7 @@
                                 value += 2 * half_eps
                                 mant_dig = rfloat.DBL_MANT_DIG
                                 if (top_exp == rfloat.DBL_MAX_EXP and
-                                    value == math.ldexp(2 * half_eps, mant_dig)):
+                                    value == math.ldexp(2 * float(half_eps), mant_dig)):
                                     raise oefmt(space.w_OverflowError, "too large")
                         value = math.ldexp(value, (exp + 4*key_digit))
         while i < length and s[i].isspace():

From pypy.commits at gmail.com  Sat Nov 18 23:54:53 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 18 Nov 2017 20:54:53 -0800 (PST)
Subject: [pypy-commit] pypy default: Fix yet another misuse of py.test.skip
Message-ID: <5a110e9d.d18d1c0a.1f0fd.0065@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93082:e5808d8c24ff
Date: 2017-11-19 04:54 +0000
http://bitbucket.org/pypy/pypy/changeset/e5808d8c24ff/

Log:	Fix yet another misuse of py.test.skip

diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -2,7 +2,7 @@
 from rpython.rtyper.test.test_llinterp import interpret
 from rpython.rlib.rarithmetic import *
 from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
-from hypothesis import given, strategies
+from hypothesis import given, strategies, assume
 import sys
 import py
 
@@ -404,8 +404,11 @@
 def test_int_c_div_mod(x, y):
     assert int_c_div(~x, y) == -(abs(~x) // y)
     assert int_c_div( x,-y) == -(x // y)
-    if (x, y) == (sys.maxint, 1):
-        py.test.skip("would overflow")
+
+ at given(strategies.integers(min_value=0, max_value=sys.maxint),
+       strategies.integers(min_value=1, max_value=sys.maxint))
+def test_int_c_div_mod_2(x, y):
+    assume((x, y) != (sys.maxint, 1))  # This case would overflow
     assert int_c_div(~x,-y) == +(abs(~x) // y)
     for x1 in [x, ~x]:
         for y1 in [y, -y]:

From pypy.commits at gmail.com  Sun Nov 19 04:20:55 2017
From: pypy.commits at gmail.com (arigo)
Date: Sun, 19 Nov 2017 01:20:55 -0800 (PST)
Subject: [pypy-commit] pypy default: Fix this fragile test (it broke because
 of 167b802baf3b, unsure why)
Message-ID: <5a114cf7.49c71c0a.ec657.3c99@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93083:eb297be15f48
Date: 2017-11-19 10:20 +0100
http://bitbucket.org/pypy/pypy/changeset/eb297be15f48/

Log:	Fix this fragile test (it broke because of 167b802baf3b, unsure why)

diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py
--- a/pypy/module/thread/test/test_import_lock.py
+++ b/pypy/module/thread/test/test_import_lock.py
@@ -105,7 +105,7 @@
         assert importlock.count == 0
         # A new module
         importhook(space, 're')
-        assert importlock.count == 9
+        assert importlock.count >= 9
         # Import it again
         previous_count = importlock.count
         importhook(space, 're')

From pypy.commits at gmail.com  Sun Nov 19 12:38:52 2017
From: pypy.commits at gmail.com (mattip)
Date: Sun, 19 Nov 2017 09:38:52 -0800 (PST)
Subject: [pypy-commit] pypy default: unbreak macos build
Message-ID: <5a11c1ac.17361c0a.271b9.0487@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93084:a306385caebf
Date: 2017-11-19 19:37 +0200
http://bitbucket.org/pypy/pypy/changeset/a306385caebf/

Log:	unbreak macos build

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -69,7 +69,7 @@
     post_include_bits=[],
     compile_extra=compile_extra
     )
-if sys.platform.startswith('linux'):
+if sys.platform != 'win32':
     eci_kwds['separate_module_files'].append(
         SHARED.join('vmprof_mt.c'),
     )

From pypy.commits at gmail.com  Sun Nov 19 17:07:37 2017
From: pypy.commits at gmail.com (arigo)
Date: Sun, 19 Nov 2017 14:07:37 -0800 (PST)
Subject: [pypy-commit] pypy default: Follow-up for cb9634421fa2: revert the
 very general change and instead
Message-ID: <5a1200a9.5d87df0a.a0b86.a0fb@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93085:d00a16ef468f
Date: 2017-11-19 23:04 +0100
http://bitbucket.org/pypy/pypy/changeset/d00a16ef468f/

Log:	Follow-up for cb9634421fa2: revert the very general change and
	instead improve the logic at the point where it matters, with a
	comment.

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -15,34 +15,10 @@
     typeof, s_ImpossibleValue, SomeInstance, intersection, difference)
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
-from collections import deque
 
 log = AnsiLogger("annrpython")
 
 
-class ShuffleDict(object):
-    def __init__(self):
-        self._d = {}
-        self.keys = deque()
-
-    def __setitem__(self, k, v):
-        if k in self._d:
-            self._d[k] = v
-        else:
-            self._d[k] = v
-            self.keys.append(k)
-
-    def __getitem__(self, k):
-        return self._d[k]
-
-    def popitem(self):
-        key = self.keys.popleft()
-        item = self._d.pop(key)
-        return (key, item)
-
-    def __nonzero__(self):
-        return bool(self._d)
-
 class RPythonAnnotator(object):
     """Block annotator for RPython.
     See description in doc/translation.txt."""
@@ -57,7 +33,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = ShuffleDict()  # map {block: graph-containing-it}
+        self.pendingblocks = {}  # map {block: graph-containing-it}
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
@@ -216,8 +192,15 @@
 
     def complete_pending_blocks(self):
         while self.pendingblocks:
-            block, graph = self.pendingblocks.popitem()
-            self.processblock(graph, block)
+            # Grab all blocks from 'self.pendingblocks' in a list, and
+            # walk that list.  This prevents a situation where the same
+            # block is added over and over again to 'self.pendingblocks'
+            # and the code here would pop that same block from the dict
+            # over and over again, without ever looking at other blocks.
+            all_blocks = self.pendingblocks.keys()
+            for block in all_blocks:
+                graph = self.pendingblocks.pop(block)
+                self.processblock(graph, block)
 
     def complete(self):
         """Process pending blocks until none is left."""

From pypy.commits at gmail.com  Sun Nov 19 18:33:13 2017
From: pypy.commits at gmail.com (arigo)
Date: Sun, 19 Nov 2017 15:33:13 -0800 (PST)
Subject: [pypy-commit] pypy default: Tweak: 16% speed increase of PyPy
 annotation, by avoiding the
Message-ID: <5a1214b9.169a1c0a.7c8af.d8dc@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93086:60c4fa1b0539
Date: 2017-11-20 00:32 +0100
http://bitbucket.org/pypy/pypy/changeset/60c4fa1b0539/

Log:	Tweak: 16% speed increase of PyPy annotation, by avoiding the
	situation where a block is rescheduled many many times before it is
	finally resolved (e.g. because resolving it requires (re)flowing
	through a very long chain of blocks).

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -33,7 +33,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = {}  # map {block: graph-containing-it}
+        self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
@@ -57,7 +57,7 @@
         self.errors = []
 
     def __getstate__(self):
-        attrs = """translator pendingblocks annotated links_followed
+        attrs = """translator genpendingblocks annotated links_followed
         notify bookkeeper frozen policy added_blocks""".split()
         ret = self.__dict__.copy()
         for key, value in ret.items():
@@ -188,18 +188,39 @@
             else:
                 self.mergeinputargs(graph, block, cells)
             if not self.annotated[block]:
-                self.pendingblocks[block] = graph
+                self.schedulependingblock(graph, block)
+
+    def schedulependingblock(self, graph, block):
+        # 'self.genpendingblocks' is a list of dictionaries which is
+        # logically equivalent to just one dictionary.  But we keep a
+        # 'generation' number on each block (=key), and whenever we
+        # process a block, we increase its generation number.  The
+        # block is added to the 'genpendingblocks' indexed by its
+        # generation number.  See complete_pending_blocks() below.
+        generation = getattr(block, 'generation', 0)
+        self.genpendingblocks[generation][block] = graph
 
     def complete_pending_blocks(self):
-        while self.pendingblocks:
-            # Grab all blocks from 'self.pendingblocks' in a list, and
-            # walk that list.  This prevents a situation where the same
-            # block is added over and over again to 'self.pendingblocks'
-            # and the code here would pop that same block from the dict
-            # over and over again, without ever looking at other blocks.
-            all_blocks = self.pendingblocks.keys()
-            for block in all_blocks:
-                graph = self.pendingblocks.pop(block)
+        while True:
+            # Find the first of the dictionaries in 'self.genpendingblocks'
+            # which is not empty
+            gen = 0
+            for pendingblocks in self.genpendingblocks:
+                if pendingblocks:
+                    break
+                gen += 1
+            else:
+                return    # all empty => done
+
+            gen += 1   # next generation number
+            if len(self.genpendingblocks) == gen:
+                self.genpendingblocks.append({})
+
+            # Process all blocks at this level
+            # (if any gets re-inserted, it will be into the next level)
+            while pendingblocks:
+                block, graph = pendingblocks.popitem()
+                block.generation = gen
                 self.processblock(graph, block)
 
     def complete(self):
@@ -207,7 +228,7 @@
         while True:
             self.complete_pending_blocks()
             self.policy.no_more_blocks_to_annotate(self)
-            if not self.pendingblocks:
+            if not any(self.genpendingblocks):
                 break   # finished
         # make sure that the return variables of all graphs is annotated
         if self.added_blocks is not None:
@@ -393,7 +414,7 @@
     def reflowpendingblock(self, graph, block):
         assert not self.frozen
         assert graph not in self.fixed_graphs
-        self.pendingblocks[block] = graph
+        self.schedulependingblock(graph, block)
         assert block in self.annotated
         self.annotated[block] = False  # must re-flow
         self.blocked_blocks[block] = (graph, None)
diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py
--- a/rpython/flowspace/model.py
+++ b/rpython/flowspace/model.py
@@ -170,7 +170,7 @@
 
 class Block(object):
     __slots__ = """inputargs operations exitswitch
-                exits blockcolor""".split()
+                exits blockcolor generation""".split()
 
     def __init__(self, inputargs):
         self.inputargs = list(inputargs)  # mixed list of variable/const XXX

From pypy.commits at gmail.com  Mon Nov 20 05:14:36 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 02:14:36 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: start working on more obscure
 codecs and completely remove hacks that go via UCS4 from unicodehelper. Now
 unicodehelper no longer uses runicode
Message-ID: <5a12ab0c.01a4df0a.acba3.439b@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93087:3e5acb0a1e81
Date: 2017-11-20 11:13 +0100
http://bitbucket.org/pypy/pypy/changeset/3e5acb0a1e81/

Log:	start working on more obscure codecs and completely remove hacks
	that go via UCS4 from unicodehelper. Now unicodehelper no longer
	uses runicode

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1,7 +1,9 @@
+import sys
+
 from pypy.interpreter.error import OperationError
 from rpython.rlib.objectmodel import specialize
-from rpython.rlib import runicode, rutf8
-from rpython.rlib.rarithmetic import r_uint
+from rpython.rlib import rutf8
+from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rlib.rstring import StringBuilder
 from pypy.module._codecs import interp_codecs
 
@@ -168,47 +170,6 @@
     r = res.build()
     return r
 
-class DecodeWrapper(object):
-    def __init__(self, handler):
-        self.orig = handler
-
-    def handle(self, errors, encoding, msg, s, pos, endpos):
-        return self.orig(errors, encoding, msg, s, pos, endpos)
-
-class EncodeWrapper(object):
-    def __init__(self, handler):
-        self.orig = handler
-
-    def handle(self, errors, encoding, msg, s, pos, endpos):
-        return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos)
-
-def setup_new_encoders_legacy(encoding):
-    encoder_name = 'utf8_encode_' + encoding
-    encoder_call_name = 'unicode_encode_' + encoding
-    decoder_name = 'str_decode_' + encoding
-    def encoder(utf8, errors, errorhandler):
-        u = utf8.decode("utf8")
-        w = EncodeWrapper(errorhandler)
-        return getattr(runicode, encoder_call_name)(u, len(u), errors,
-                       w.handle)
-    def decoder(s, slen, errors, final, errorhandler):
-        w = DecodeWrapper((errorhandler))
-        u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle)
-        return u.encode('utf8'), pos, len(u), _get_flag(u)
-    encoder.__name__ = encoder_name
-    decoder.__name__ = decoder_name
-    if encoder_name not in globals():
-        globals()[encoder_name] = encoder
-    if decoder_name not in globals():
-        globals()[decoder_name] = decoder
-
-def setup():
-    for encoding in ['utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32',
-                     'utf_32_be', 'unicode_internal']:
-        setup_new_encoders_legacy(encoding)
-
-setup()
-
 def utf8_encode_ascii(utf8, errors, errorhandler):
     """ Don't be confused - this is a slowpath for errors e.g. "ignore"
     or an obscure errorhandler
@@ -618,6 +579,41 @@
     lgt, flag = rutf8.check_utf8(r, True)
     return r, pos, lgt, flag
 
+
+TABLE = '0123456789abcdef'
+
+def raw_unicode_escape_helper(result, char):
+    if char >= 0x10000 or char < 0:
+        result.append("\\U")
+        zeros = 8
+    elif char >= 0x100:
+        result.append("\\u")
+        zeros = 4
+    else:
+        result.append("\\x")
+        zeros = 2
+    for i in range(zeros-1, -1, -1):
+        result.append(TABLE[(char >> (4 * i)) & 0x0f])
+
+def utf8_encode_raw_unicode_escape(s, errors, errorhandler=None):
+    # errorhandler is not used: this function cannot cause Unicode errors
+    size = len(s)
+    if size == 0:
+        return ''
+    result = StringBuilder(size)
+    pos = 0
+    while pos < size:
+        oc = ord(s[pos])
+
+        if oc < 0x100:
+            result.append(chr(oc))
+        else:
+            raw_unicode_escape_helper(result, oc)
+        pos += 1
+
+    return result.build()
+
+
 # ____________________________________________________________
 # utf-7
 
@@ -896,3 +892,395 @@
         result.append('-')
 
     return result.build()
+
+# ____________________________________________________________
+# utf-16
+
+BYTEORDER = sys.byteorder
+BYTEORDER2 = BYTEORDER[0] + 'e'      # either "le" or "be"
+assert BYTEORDER2 in ('le', 'be')
+
+def str_decode_utf_16(s, errors, final=True,
+                      errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final,
+                                                         errorhandler, "native")
+    return result, c, lgt, flag
+
+def str_decode_utf_16_be(s, errors, final=True,
+                        errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final,
+                                                         errorhandler, "big")
+    return result, c, lgt, flag
+
+def str_decode_utf_16_le(s, errors, final=True,
+                         errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final,
+                                                         errorhandler, "little")
+    return result, c, lgt, flag
+
+def str_decode_utf_16_helper(s, errors, final=True,
+                             errorhandler=None,
+                             byteorder="native",
+                             public_encoding_name='utf16'):
+    size = len(s)
+    bo = 0
+
+    if BYTEORDER == 'little':
+        ihi = 1
+        ilo = 0
+    else:
+        ihi = 0
+        ilo = 1
+
+    #  Check for BOM marks (U+FEFF) in the input and adjust current
+    #  byte order setting accordingly. In native mode, the leading BOM
+    #  mark is skipped, in all other modes, it is copied to the output
+    #  stream as-is (giving a ZWNBSP character).
+    pos = 0
+    if byteorder == 'native':
+        if size >= 2:
+            bom = (ord(s[ihi]) << 8) | ord(s[ilo])
+            if BYTEORDER == 'little':
+                if bom == 0xFEFF:
+                    pos += 2
+                    bo = -1
+                elif bom == 0xFFFE:
+                    pos += 2
+                    bo = 1
+            else:
+                if bom == 0xFEFF:
+                    pos += 2
+                    bo = 1
+                elif bom == 0xFFFE:
+                    pos += 2
+                    bo = -1
+    elif byteorder == 'little':
+        bo = -1
+    else:
+        bo = 1
+    if size == 0:
+        return u'', 0, bo
+    if bo == -1:
+        # force little endian
+        ihi = 1
+        ilo = 0
+
+    elif bo == 1:
+        # force big endian
+        ihi = 0
+        ilo = 1
+
+    result = StringBuilder(size // 2)
+
+    #XXX I think the errors are not correctly handled here
+    while pos < size:
+        # remaining bytes at the end? (size should be even)
+        if len(s) - pos < 2:
+            if not final:
+                break
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "truncated data",
+                                  s, pos, len(s))
+            result.append(r)
+            if len(s) - pos < 2:
+                break
+        ch = (ord(s[pos + ihi]) << 8) | ord(s[pos + ilo])
+        pos += 2
+        if ch < 0xD800 or ch > 0xDFFF:
+            rutf8.unichr_as_utf8_append(result, ch)
+            continue
+        # UTF-16 code pair:
+        if len(s) - pos < 2:
+            pos -= 2
+            if not final:
+                break
+            errmsg = "unexpected end of data"
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  errmsg, s, pos, len(s))
+            result.append(r)
+            if len(s) - pos < 2:
+                break
+        elif 0xD800 <= ch <= 0xDBFF:
+            ch2 = (ord(s[pos+ihi]) << 8) | ord(s[pos+ilo])
+            pos += 2
+            if 0xDC00 <= ch2 <= 0xDFFF:
+                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
+                rutf8.unichr_as_utf8_append(result, ch)
+                continue
+            else:
+                r, pos = errorhandler(errors, public_encoding_name,
+                                      "illegal UTF-16 surrogate",
+                                      s, pos - 4, pos - 2)
+                result.append(r)
+        else:
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "illegal encoding",
+                                  s, pos - 2, pos)
+            result.append(r)
+    r = result.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return result.build(), pos, lgt, flag, bo
+
+def _STORECHAR(result, CH, byteorder):
+    hi = chr(((CH) >> 8) & 0xff)
+    lo = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(lo)
+        result.append(hi)
+    else:
+        result.append(hi)
+        result.append(lo)
+
+def unicode_encode_utf_16_helper(s, errors,
+                                 errorhandler=None,
+                                 allow_surrogates=True,
+                                 byteorder='little',
+                                 public_encoding_name='utf16'):
+    size = len(s)
+    if size == 0:
+        if byteorder == 'native':
+            result = StringBuilder(2)
+            _STORECHAR(result, 0xFEFF, BYTEORDER)
+            return result.build()
+        return ""
+
+    result = StringBuilder(size * 2 + 2)
+    if byteorder == 'native':
+        _STORECHAR(result, 0xFEFF, BYTEORDER)
+        byteorder = BYTEORDER
+
+    pos = 0
+    while pos < size:
+        ch = rutf8.codepoint_at_pos(s, pos)
+        pos = rutf8.next_codepoint_pos(s, pos)
+
+        if ch < 0xD800:
+            _STORECHAR(result, ch, byteorder)
+        elif ch >= 0x10000:
+            _STORECHAR(result, 0xD800 | ((ch-0x10000) >> 10), byteorder)
+            _STORECHAR(result, 0xDC00 | ((ch-0x10000) & 0x3FF), byteorder)
+        elif ch >= 0xE000 or allow_surrogates:
+            _STORECHAR(result, ch, byteorder)
+        else:
+            ru, pos = errorhandler(errors, public_encoding_name,
+                                   'surrogates not allowed',
+                                    s, pos-1, pos)
+            xxx
+            #if rs is not None:
+            #    # py3k only
+            #    if len(rs) % 2 != 0:
+            #        errorhandler('strict', public_encoding_name,
+            #                     'surrogates not allowed',
+            #                     s, pos-1, pos)
+            #    result.append(rs)
+            #    continue
+            for ch in ru:
+                if ord(ch) < 0xD800:
+                    _STORECHAR(result, ord(ch), byteorder)
+                else:
+                    errorhandler('strict', public_encoding_name,
+                                 'surrogates not allowed',
+                                 s, pos-1, pos)
+            continue
+
+    return result.build()
+
+def utf8_encode_utf_16(s, errors,
+                          errorhandler=None,
+                          allow_surrogates=True):
+    return unicode_encode_utf_16_helper(s, errors, errorhandler,
+                                        allow_surrogates, "native")
+
+def utf8_encode_utf_16_be(s, errors,
+                             errorhandler=None,
+                             allow_surrogates=True):
+    return unicode_encode_utf_16_helper(s, errors, errorhandler,
+                                        allow_surrogates, "big")
+
+def utf8_encode_utf_16_le(s, errors,
+                             errorhandler=None,
+                             allow_surrogates=True):
+    return unicode_encode_utf_16_helper(s, errors, errorhandler,
+                                        allow_surrogates, "little")
+
+# ____________________________________________________________
+# utf-32
+
+def str_decode_utf_32(s, errors, final=True,
+                      errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final,
+                                                         errorhandler, "native")
+    return result, c, lgt, flag
+
+def str_decode_utf_32_be(s, errors, final=True,
+                         errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final,
+                                                         errorhandler, "big")
+    return result, c, lgt, flag
+
+def str_decode_utf_32_le(s, errors, final=True,
+                         errorhandler=None):
+    result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final,
+                                                         errorhandler, "little")
+    return result, c, lgt, flag
+
+BOM32_DIRECT  = intmask(0x0000FEFF)
+BOM32_REVERSE = intmask(0xFFFE0000)
+
+def str_decode_utf_32_helper(s, errors, final=True,
+                             errorhandler=None,
+                             byteorder="native",
+                             public_encoding_name='utf32'):
+    bo = 0
+    size = len(s)
+
+    if BYTEORDER == 'little':
+        iorder = [0, 1, 2, 3]
+    else:
+        iorder = [3, 2, 1, 0]
+
+    #  Check for BOM marks (U+FEFF) in the input and adjust current
+    #  byte order setting accordingly. In native mode, the leading BOM
+    #  mark is skipped, in all other modes, it is copied to the output
+    #  stream as-is (giving a ZWNBSP character).
+    pos = 0
+    if byteorder == 'native':
+        if size >= 4:
+            bom = intmask(
+                (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) |
+                (ord(s[iorder[1]]) << 8)  | ord(s[iorder[0]]))
+            if BYTEORDER == 'little':
+                if bom == BOM32_DIRECT:
+                    pos += 4
+                    bo = -1
+                elif bom == BOM32_REVERSE:
+                    pos += 4
+                    bo = 1
+            else:
+                if bom == BOM32_DIRECT:
+                    pos += 4
+                    bo = 1
+                elif bom == BOM32_REVERSE:
+                    pos += 4
+                    bo = -1
+    elif byteorder == 'little':
+        bo = -1
+    else:
+        bo = 1
+    if size == 0:
+        return u'', 0, bo
+    if bo == -1:
+        # force little endian
+        iorder = [0, 1, 2, 3]
+
+    elif bo == 1:
+        # force big endian
+        iorder = [3, 2, 1, 0]
+
+    result = StringBuilder(size // 4)
+
+    while pos < size:
+        # remaining bytes at the end? (size should be divisible by 4)
+        if len(s) - pos < 4:
+            if not final:
+                break
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "truncated data",
+                                  s, pos, len(s))
+            result.append(r)
+            if len(s) - pos < 4:
+                break
+            continue
+        ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) |
+              (ord(s[pos + iorder[1]]) << 8)  | ord(s[pos + iorder[0]]))
+        if ch >= 0x110000:
+            r, pos = errorhandler(errors, public_encoding_name,
+                                  "codepoint not in range(0x110000)",
+                                  s, pos, len(s))
+            result.append(r)
+            continue
+
+        rutf8.unichr_as_utf8_append(result, ch)
+        pos += 4
+    r = result.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return r, pos, lgt, flag, bo
+
+def _STORECHAR32(result, CH, byteorder):
+    c0 = chr(((CH) >> 24) & 0xff)
+    c1 = chr(((CH) >> 16) & 0xff)
+    c2 = chr(((CH) >> 8) & 0xff)
+    c3 = chr((CH) & 0xff)
+    if byteorder == 'little':
+        result.append(c3)
+        result.append(c2)
+        result.append(c1)
+        result.append(c0)
+    else:
+        result.append(c0)
+        result.append(c1)
+        result.append(c2)
+        result.append(c3)
+
+def unicode_encode_utf_32_helper(s, errors,
+                                 errorhandler=None,
+                                 allow_surrogates=True,
+                                 byteorder='little',
+                                 public_encoding_name='utf32'):
+    size = len(s)
+    if size == 0:
+        if byteorder == 'native':
+            result = StringBuilder(4)
+            _STORECHAR32(result, 0xFEFF, BYTEORDER)
+            return result.build()
+        return ""
+
+    result = StringBuilder(size * 4 + 4)
+    if byteorder == 'native':
+        _STORECHAR32(result, 0xFEFF, BYTEORDER)
+        byteorder = BYTEORDER
+
+    pos = 0
+    while pos < size:
+        ch = rutf8.codepoint_at_pos(s, pos)
+        pos = rutf8.next_codepoint_pos(s, pos)
+        ch2 = 0
+        if not allow_surrogates and 0xD800 <= ch < 0xE000:
+            ru, pos = errorhandler(errors, public_encoding_name,
+                                        'surrogates not allowed',
+                                        s, pos-1, pos)
+            XXX
+            if rs is not None:
+                # py3k only
+                if len(rs) % 4 != 0:
+                    errorhandler('strict', public_encoding_name,
+                                    'surrogates not allowed',
+                                    s, pos-1, pos)
+                result.append(rs)
+                continue
+            for ch in ru:
+                if ord(ch) < 0xD800:
+                    _STORECHAR32(result, ord(ch), byteorder)
+                else:
+                    errorhandler('strict', public_encoding_name,
+                                    'surrogates not allowed',
+                                    s, pos-1, pos)
+            continue
+        _STORECHAR32(result, ch, byteorder)
+
+    return result.build()
+
+def utf8_encode_utf_32(s, errors,
+                          errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, errors, errorhandler,
+                                        allow_surrogates, "native")
+
+def utf8_encode_utf_32_be(s, errors,
+                             errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, errors, errorhandler,
+                                        allow_surrogates, "big")
+
+def utf8_encode_utf_32_le(s, errors,
+                             errorhandler=None, allow_surrogates=True):
+    return unicode_encode_utf_32_helper(s, errors, errorhandler,
+                                        allow_surrogates, "little")
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -30,6 +30,10 @@
                               endpos):
             """Generic wrapper for calling into error handlers.
 
+            Note that error handler receives and returns position into
+            the unicode characters, not into the position of utf8 bytes,
+            so it needs to be converted by the codec
+
             Returns (unicode_or_none, str_or_none, newpos) as error
             handlers may return unicode or on Python 3, bytes.
             """
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -15,6 +15,7 @@
                          'utf-32', 'utf-32-le', 'utf-32-be',
                          'raw_unicode_escape',
                          'unicode_escape', 'unicode_internal'):
+            print encoding
             assert unicode(u.encode(encoding),encoding) == u
 
     def test_ucs4(self):
@@ -115,10 +116,10 @@
         raises(TypeError, charmap_decode, '\xff', "strict",  {0xff: 0x110000})
         assert (charmap_decode("\x00\x01\x02", "strict",
                                {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
-                u"\U0010FFFFbc", 3)
+                (u"\U0010FFFFbc", 3))
         assert (charmap_decode("\x00\x01\x02", "strict",
                                {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
-                u"\U0010FFFFbc", 3)
+                (u"\U0010FFFFbc", 3))
 
     def test_escape_decode_errors(self):
         from _codecs import escape_decode as decode
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1194,7 +1194,7 @@
                 assert False, "always raises"
             return space.newbytes(s)
         if ((encoding is None and space.sys.defaultencoding == 'utf8') or
-             encoding == 'utf-8' or encoding == 'utf8'):
+             encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'):
             return space.newbytes(space.utf8_w(w_object))
     if w_encoder is None:
         from pypy.module._codecs.interp_codecs import lookup_codec

From pypy.commits at gmail.com  Mon Nov 20 05:44:35 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 20 Nov 2017 02:44:35 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Rename this directory to avoid
 name conflict with "rutf8.py"
Message-ID: <5a12b213.c2311c0a.b2cae.6fe9@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93088:0a3c81c3f67d
Date: 2017-11-20 11:44 +0100
http://bitbucket.org/pypy/pypy/changeset/0a3c81c3f67d/

Log:	Rename this directory to avoid name conflict with "rutf8.py"

diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/fastutf8/capi.py
rename from rpython/rlib/rutf8/capi.py
rename to rpython/rlib/fastutf8/capi.py
diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/fastutf8/src/utf8-avx.c
rename from rpython/rlib/rutf8/src/utf8-avx.c
rename to rpython/rlib/fastutf8/src/utf8-avx.c
diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/fastutf8/src/utf8-scalar.c
rename from rpython/rlib/rutf8/src/utf8-scalar.c
rename to rpython/rlib/fastutf8/src/utf8-scalar.c
diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/fastutf8/src/utf8-sse4.c
rename from rpython/rlib/rutf8/src/utf8-sse4.c
rename to rpython/rlib/fastutf8/src/utf8-sse4.c
diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/fastutf8/src/utf8.c
rename from rpython/rlib/rutf8/src/utf8.c
rename to rpython/rlib/fastutf8/src/utf8.c
diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/fastutf8/src/utf8.h
rename from rpython/rlib/rutf8/src/utf8.h
rename to rpython/rlib/fastutf8/src/utf8.h

From pypy.commits at gmail.com  Mon Nov 20 05:53:44 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 02:53:44 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: in progress
Message-ID: <5a12b438.c23a1c0a.17fa4.ddaf@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93089:e4a80363506c
Date: 2017-11-20 11:52 +0100
http://bitbucket.org/pypy/pypy/changeset/e4a80363506c/

Log:	in progress

diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -1,30 +1,35 @@
+from hypothesis import given, strategies
+
+from rpython.rlib import rutf8
+
 from pypy.interpreter.unicodehelper import str_decode_utf8
 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii
+from pypy.interpreter import unicodehelper as uh
 
 def decode_utf8(u):
     return str_decode_utf8(u, True, "strict", None)
 
 def test_decode_utf8():
-    assert decode_utf8("abc") == ("abc", 3)
-    assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 1)
-    assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 1)
-    assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 1)
+    assert decode_utf8("abc") == ("abc", 3, 3, rutf8.FLAG_ASCII)
+    assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1, rutf8.FLAG_REGULAR)
+    assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES)
+    assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES)
     assert decode_utf8("\xed\xa0\x80\xed\xb0\x80") == (
-        "\xed\xa0\x80\xed\xb0\x80", 2)
-    assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 1)
+        "\xed\xa0\x80\xed\xb0\x80", 6, 2, rutf8.FLAG_HAS_SURROGATES)
+    assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1, rutf8.FLAG_REGULAR)
 
 def test_utf8_encode_ascii():
-    assert utf8_encode_ascii("abc", 3, "??", "??") == "abc"
+    assert utf8_encode_ascii("abc", "??", "??") == "abc"
     def eh(errors, encoding, reason, p, start, end):
         lst.append((errors, encoding, p, start, end))
         return "<FOO>", end
     lst = []
     input = u"\u1234".encode("utf8")
-    assert utf8_encode_ascii(input, 1, "??", eh) == "<FOO>"
+    assert utf8_encode_ascii(input, "??", eh) == "<FOO>"
     assert lst == [("??", "ascii", input, 0, 1)]
     lst = []
     input = u"\u1234\u5678abc\u8765\u4321".encode("utf8")
-    assert utf8_encode_ascii(input, 7, "??", eh) == "<FOO>abc<FOO>"
+    assert utf8_encode_ascii(input, "??", eh) == "<FOO>abc<FOO>"
     assert lst == [("??", "ascii", input, 0, 2),
                    ("??", "ascii", input, 5, 7)]
 
@@ -46,3 +51,7 @@
                    ("??", "ascii", input, 1, 2),
                    ("??", "ascii", input, 5, 6),
                    ("??", "ascii", input, 6, 7)]
+
+ at given(strategies.binary())
+def test_unicode_raw_escape(s):
+    uh.utf8_encode_raw_unicode_escape(s, 'strict')
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -158,7 +158,7 @@
                 res.append(chr(oc))
                 i += 1
             else:
-                r, pos = errorhandler(errors, 'latin1', 
+                r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
                 res.append(r)
@@ -189,7 +189,7 @@
             res.append(r)
         else:
             res.append(chr(ch))
-            i = rutf8.next_codepoint_pos(utf8, i)    
+            i = rutf8.next_codepoint_pos(utf8, i)
             pos += 1
 
     s = res.build()
@@ -318,7 +318,7 @@
     assert pos - continuation_bytes >= 0
     r = res.build()
     lgt, flag = rutf8.check_utf8(r, True)
-    return r, pos - continuation_bytes, lgt, flag
+    return r, pos, lgt, flag
 
 hexdigits = "0123456789ABCDEFabcdef"
 
@@ -362,7 +362,7 @@
                     flag = rutf8.FLAG_REGULAR
                 pos += digits
                 size = 1
-                
+
     return pos, size, flag
 
 def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler):

From pypy.commits at gmail.com  Mon Nov 20 05:53:46 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 02:53:46 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge
Message-ID: <5a12b43a.c23a1c0a.17fa4.ddb5@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93090:78c8a9571b3d
Date: 2017-11-20 11:53 +0100
http://bitbucket.org/pypy/pypy/changeset/78c8a9571b3d/

Log:	merge

diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/fastutf8/capi.py
rename from rpython/rlib/rutf8/capi.py
rename to rpython/rlib/fastutf8/capi.py
diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/fastutf8/src/utf8-avx.c
rename from rpython/rlib/rutf8/src/utf8-avx.c
rename to rpython/rlib/fastutf8/src/utf8-avx.c
diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/fastutf8/src/utf8-scalar.c
rename from rpython/rlib/rutf8/src/utf8-scalar.c
rename to rpython/rlib/fastutf8/src/utf8-scalar.c
diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/fastutf8/src/utf8-sse4.c
rename from rpython/rlib/rutf8/src/utf8-sse4.c
rename to rpython/rlib/fastutf8/src/utf8-sse4.c
diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/fastutf8/src/utf8.c
rename from rpython/rlib/rutf8/src/utf8.c
rename to rpython/rlib/fastutf8/src/utf8.c
diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/fastutf8/src/utf8.h
rename from rpython/rlib/rutf8/src/utf8.h
rename to rpython/rlib/fastutf8/src/utf8.h

From pypy.commits at gmail.com  Mon Nov 20 07:57:36 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 04:57:36 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: * Improve ascii/utf8 codecs and
 unicode escape
Message-ID: <5a12d140.08a5df0a.f1c99.7558@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93091:4668380f4c79
Date: 2017-11-20 13:56 +0100
http://bitbucket.org/pypy/pypy/changeset/4668380f4c79/

Log:	* Improve ascii/utf8 codecs and unicode escape
	* Raise instead of looping infinitely when errorhandler returns
	nonsense

diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -33,25 +33,33 @@
     assert lst == [("??", "ascii", input, 0, 2),
                    ("??", "ascii", input, 5, 7)]
 
+ at given(strategies.text())
+def test_utf8_encode_ascii_2(u):
+    def eh(errors, encoding, reason, p, start, end):
+        return "?" * (end - start), end
+
+    assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace")
+
 def test_str_decode_ascii():
-    assert str_decode_ascii("abc", 3, "??", True, "??") == ("abc", 3, 3)
+    assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3, rutf8.FLAG_ASCII)
     def eh(errors, encoding, reason, p, start, end):
         lst.append((errors, encoding, p, start, end))
-        return u"\u1234\u5678", end
+        return u"\u1234\u5678".encode("utf8"), end
     lst = []
     input = "\xe8"
     exp = u"\u1234\u5678".encode("utf8")
-    assert str_decode_ascii(input, 1, "??", True, eh) == (exp, 1, 2)
+    assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2, rutf8.FLAG_REGULAR)
     assert lst == [("??", "ascii", input, 0, 1)]
     lst = []
     input = "\xe8\xe9abc\xea\xeb"
-    assert str_decode_ascii(input, 7, "??", True, eh) == (
-        exp + exp + "abc" + exp + exp, 7, 11)
+    assert str_decode_ascii(input, "??", True, eh) == (
+        exp + exp + "abc" + exp + exp, 7, 11, rutf8.FLAG_REGULAR)
     assert lst == [("??", "ascii", input, 0, 1),
                    ("??", "ascii", input, 1, 2),
                    ("??", "ascii", input, 5, 6),
                    ("??", "ascii", input, 6, 7)]
 
- at given(strategies.binary())
-def test_unicode_raw_escape(s):
-    uh.utf8_encode_raw_unicode_escape(s, 'strict')
+ at given(strategies.text())
+def test_unicode_raw_escape(u):
+    r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict')
+    assert r == u.encode("raw-unicode-escape")
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -158,6 +158,7 @@
                 res.append(chr(oc))
                 i += 1
             else:
+                XXX
                 r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
@@ -179,10 +180,15 @@
     pos = 0
     while i < len(utf8):
         ch = rutf8.codepoint_at_pos(utf8, i)
-        if ch >= 0x7F:
+        if ch > 0x7F:
+            endpos = pos + 1
+            end_i = rutf8.next_codepoint_pos(utf8, i)
+            while end_i < len(utf8) and rutf8.codepoint_at_pos(utf8, end_i) > 0x7F:
+                endpos += 1
+                end_i = rutf8.next_codepoint_pos(utf8, end_i)
             msg = "ordinal not in range(128)"
             r, newpos = errorhandler(errors, 'ascii', msg, utf8,
-                pos, pos + 1)
+                pos, endpos)
             for _ in range(newpos - pos):
                 i = rutf8.next_codepoint_pos(utf8, i)
             pos = newpos
@@ -603,13 +609,13 @@
     result = StringBuilder(size)
     pos = 0
     while pos < size:
-        oc = ord(s[pos])
+        oc = rutf8.codepoint_at_pos(s, pos)
 
         if oc < 0x100:
             result.append(chr(oc))
         else:
             raw_unicode_escape_helper(result, oc)
-        pos += 1
+        pos = rutf8.next_codepoint_pos(s, pos)
 
     return result.build()
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -71,6 +71,9 @@
                 raise oefmt(space.w_IndexError,
                             "position %d from error handler out of bounds",
                             newpos)
+            if newpos < startpos:
+                raise oefmt(space.w_IndexError,
+                    "position %d from error handler did not progress", newpos)
             w_replace = space.convert_to_w_unicode(w_replace)
             return w_replace._utf8, newpos
         return call_errorhandler

From pypy.commits at gmail.com  Mon Nov 20 08:20:19 2017
From: pypy.commits at gmail.com (stian)
Date: Mon, 20 Nov 2017 05:20:19 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Test and fix for int rbinop
 overflow to long, also add a deeper test for int_floordiv
Message-ID: <5a12d693.4a371c0a.3387f.acb3@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93092:e6c9af023bc5
Date: 2017-11-20 14:19 +0100
http://bitbucket.org/pypy/pypy/changeset/e6c9af023bc5/

Log:	Test and fix for int rbinop overflow to long, also add a deeper test
	for int_floordiv

diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py
--- a/pypy/objspace/std/intobject.py
+++ b/pypy/objspace/std/intobject.py
@@ -589,7 +589,7 @@
                 try:
                     return func(space, y, x)
                 except OverflowError:
-                    return ovf2long(space, y, x, w_other)
+                    return ovf2long(space, y, x, self)
             else:
                 return func(space, y, x)
 
diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py
--- a/pypy/objspace/std/test/test_intobject.py
+++ b/pypy/objspace/std/test/test_intobject.py
@@ -613,6 +613,9 @@
         assert type(x) is int
         assert str(x) == "0"
 
+    def test_rbinop_overflow(self):
+        x = int(321)
+        assert x.__rlshift__(333) == 1422567365923326114875084456308921708325401211889530744784729710809598337369906606315292749899759616L
 
 class AppTestIntShortcut(AppTestInt):
     spaceconfig = {"objspace.std.intshortcut": True}
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -70,6 +70,15 @@
         r2 = r.int_floordiv(10)
         assert r2.tolong() == 100L
 
+        for op1 in gen_signs(long_vals):
+            for op2 in gen_signs(long_vals):
+                if not op2 or op2 >= (1 << SHIFT) or op2 <= -(1 << SHIFT):
+                    continue
+                rl_op1 = rbigint.fromlong(op1)
+                r1 = rl_op1.int_floordiv(op2)
+                r2 = op1 // op2
+                assert r1.tolong() == r2
+                
         assert py.test.raises(ZeroDivisionError, r.int_floordiv, 0)
 
         # Error pointed out by Armin Rigo

From pypy.commits at gmail.com  Mon Nov 20 08:33:28 2017
From: pypy.commits at gmail.com (stian)
Date: Mon, 20 Nov 2017 05:33:28 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Add test for overflow with
 regular binops too, now there should be test for all changes to intobject
Message-ID: <5a12d9a8.54d91c0a.cb2a3.8fd0@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93093:89a762f37f25
Date: 2017-11-20 14:32 +0100
http://bitbucket.org/pypy/pypy/changeset/89a762f37f25/

Log:	Add test for overflow with regular binops too, now there should be
	test for all changes to intobject

diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py
--- a/pypy/objspace/std/test/test_intobject.py
+++ b/pypy/objspace/std/test/test_intobject.py
@@ -613,6 +613,10 @@
         assert type(x) is int
         assert str(x) == "0"
 
+    def test_binop_overflow(self):
+        x = int(2)
+        assert x.__lshift__(128) == 680564733841876926926749214863536422912L
+
     def test_rbinop_overflow(self):
         x = int(321)
         assert x.__rlshift__(333) == 1422567365923326114875084456308921708325401211889530744784729710809598337369906606315292749899759616L

From pypy.commits at gmail.com  Mon Nov 20 09:02:15 2017
From: pypy.commits at gmail.com (stian)
Date: Mon, 20 Nov 2017 06:02:15 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Test for int_pow,
 test+fix for pow ValueError with third argument as 0
Message-ID: <5a12e067.c23a1c0a.17fa4.1a69@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93094:9291ee92df89
Date: 2017-11-20 15:01 +0100
http://bitbucket.org/pypy/pypy/changeset/9291ee92df89/

Log:	Test for int_pow, test+fix for pow ValueError with third argument as
	0

diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py
--- a/pypy/objspace/std/test/test_longobject.py
+++ b/pypy/objspace/std/test/test_longobject.py
@@ -192,6 +192,12 @@
         assert pow(x, 0L, 1L) == 0L
         assert pow(-1L, -1L) == -1.0
 
+    def test_int_pow(self):
+        x = 2L
+        assert pow(x, 2) == 4L
+        assert pow(x, 2, 2) == 0L
+        assert pow(x, 2, 3L) == 1L
+
     def test_getnewargs(self):
         assert  0L .__getnewargs__() == (0L,)
         assert  (-1L) .__getnewargs__() == (-1L,)
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -987,9 +987,7 @@
 
         size_b = UDIGIT_TYPE(b.numdigits())
 
-        if b.sign == 0:
-            return ONERBIGINT
-        elif c is not None:
+        if c is not None:
             if c.sign == 0:
                 raise ValueError("pow() 3rd argument cannot be 0")
 
@@ -1016,6 +1014,8 @@
             # so we only do it when it buys something.
             if a.sign < 0 or a.numdigits() > c.numdigits():
                 a = a.mod(c)
+        elif b.sign == 0:
+            return ONERBIGINT
         elif a.sign == 0:
             return NULLRBIGINT
         elif size_b == 1:
@@ -1124,9 +1124,7 @@
             raise ValueError("bigint pow() too negative")
 
         assert b >= 0
-        if b == 0:
-            return ONERBIGINT
-        elif c is not None:
+        if c is not None:
             if c.sign == 0:
                 raise ValueError("pow() 3rd argument cannot be 0")
 
@@ -1153,6 +1151,8 @@
             # so we only do it when it buys something.
             if a.sign < 0 or a.numdigits() > c.numdigits():
                 a = a.mod(c)
+        elif b == 0:
+            return ONERBIGINT
         elif a.sign == 0:
             return NULLRBIGINT
         elif b == 1:
diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -190,7 +190,12 @@
                 r4 = pow(op1, op2, 1000)
                 print op1, op2
                 assert r3.tolong() == r4
-                
+
+    def test_pow_raises(self):
+        r1 = rbigint.fromint(2)
+        r0 = rbigint.fromint(0)
+        py.test.raises(ValueError, r1.int_pow, 2, r0)
+        py.test.raises(ValueError, r1.pow, r1, r0)
     def test_touint(self):
         result = r_uint(sys.maxint + 42)
         rl = rbigint.fromint(sys.maxint).add(rbigint.fromint(42))

From pypy.commits at gmail.com  Mon Nov 20 09:08:56 2017
From: pypy.commits at gmail.com (stian)
Date: Mon, 20 Nov 2017 06:08:56 -0800 (PST)
Subject: [pypy-commit] pypy math-improvements: Typo in comment
Message-ID: <5a12e1f8.08a5df0a.f1c99.8cdd@mx.google.com>

Author: stian
Branch: math-improvements
Changeset: r93095:6ba5b9334842
Date: 2017-11-20 15:08 +0100
http://bitbucket.org/pypy/pypy/changeset/6ba5b9334842/

Log:	Typo in comment

diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -2120,7 +2120,7 @@
         assert vv >= 0
         assert wm1 >= 1
         q = vv / wm1
-        r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q.
+        r = vv % wm1 # This seems to be slightly faster on widen digits than vv - wm1 * q.
         vj2 = v.digit(abs(j-2))
         while wm2 * q > ((r << SHIFT) | vj2):
             q -= 1

From pypy.commits at gmail.com  Mon Nov 20 09:15:52 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 06:15:52 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fixes
Message-ID: <5a12e398.7a86df0a.46775.bd55@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93096:d17afc06eedf
Date: 2017-11-20 15:15 +0100
http://bitbucket.org/pypy/pypy/changeset/d17afc06eedf/

Log:	fixes

diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -63,3 +63,8 @@
 def test_unicode_raw_escape(u):
     r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict')
     assert r == u.encode("raw-unicode-escape")
+
+ at given(strategies.text())
+def test_unicode_escape(u):
+    r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict")
+    assert r == u.encode("unicode-escape")
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -23,13 +23,12 @@
 @specialize.memo()
 def encode_error_handler(space):
     # Fast version of the "strict" errors handler.
-    def raise_unicode_exception_encode(errors, encoding, msg, u, u_len,
+    def raise_unicode_exception_encode(errors, encoding, msg, utf8,
                                        startingpos, endingpos):
-        # XXX fix once we stop using runicode.py
-        flag = _get_flag(u.decode('utf8'))
+        u_len, flag = rutf8.check_utf8(utf8)
         raise OperationError(space.w_UnicodeEncodeError,
                              space.newtuple([space.newtext(encoding),
-                                             space.newutf8(u, u_len, flag),
+                                             space.newutf8(utf8, u_len, flag),
                                              space.newint(startingpos),
                                              space.newint(endingpos),
                                              space.newtext(msg)]))
@@ -578,13 +577,15 @@
         digits = 4 if s[pos] == 'u' else 8
         message = "truncated \\uXXXX"
         pos += 1
-        pos = hexescape(result, s, pos, digits,
+        pos, _, _ = hexescape(result, s, pos, digits,
                         "rawunicodeescape", errorhandler, message, errors)
 
     r = result.build()
     lgt, flag = rutf8.check_utf8(r, True)
     return r, pos, lgt, flag
 
+_utf8_encode_unicode_escape = rutf8.make_utf8_escape_function()
+
 
 TABLE = '0123456789abcdef'
 
@@ -620,6 +621,9 @@
     return result.build()
 
 
+def utf8_encode_unicode_escape(s, errors):
+    return _utf8_encode_unicode_escape(s)
+
 # ____________________________________________________________
 # utf-7
 

From pypy.commits at gmail.com  Mon Nov 20 10:11:17 2017
From: pypy.commits at gmail.com (mattip)
Date: Mon, 20 Nov 2017 07:11:17 -0800 (PST)
Subject: [pypy-commit] pypy default: call register_code to profile functions, cleanup
Message-ID: <5a12f095.038b1c0a.348ad.47a8@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93097:d632e9ca79ae
Date: 2017-11-20 17:09 +0200
http://bitbucket.org/pypy/pypy/changeset/d632e9ca79ae/

Log:	call register_code to profile functions, cleanup

diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -164,23 +164,25 @@
 
     @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
     def main(self, code, count):
+        code = self.MyCode('py:main:3:main')
+        rvmprof.register_code(code, self.MyCode.get_name)
+        code = self.MyCode('py:code:7:native_func')
+        rvmprof.register_code(code, self.MyCode.get_name)
         if count > 0:
             return self.main(code, count-1)
         else:
             return self.native_func(100)
 
     def test(self):
-        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
-        # never ran it. FIXME.
         from vmprof import read_profile
-        from vmprof.show import PrettyPrinter
+        # from vmprof.show import PrettyPrinter
         assert self.rpy_entry_point(3, 0.5) == 42000
         assert self.tmpfile.check()
-        #
+
         prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
-        p = PrettyPrinter()
-        p._print_tree(tree)
+        # p = PrettyPrinter()
+        # p._print_tree(tree)
         def walk(tree, symbols):
             symbols.append(tree.name)
             if len(tree.children) == 0:
@@ -189,7 +191,7 @@
                 walk(child, symbols)
         symbols = []
         walk(tree, symbols)
-        not_found = ['n:native_func']
+        not_found = ['py:code:7:native_func']
         for sym in symbols:
             for i,name in enumerate(not_found):
                 if sym.startswith(name):

From pypy.commits at gmail.com  Mon Nov 20 10:32:29 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 07:32:29 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: silence a warning on OS X
Message-ID: <5a12f58d.5d87df0a.a0b86.4f16@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93098:d18dd16d58c7
Date: 2017-11-20 16:31 +0100
http://bitbucket.org/pypy/pypy/changeset/d18dd16d58c7/

Log:	silence a warning on OS X

diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -1867,7 +1867,8 @@
 
 c_chroot = external('chroot', [rffi.CCHARP], rffi.INT,
                     save_err=rffi.RFFI_SAVE_ERRNO,
-                    macro=_MACRO_ON_POSIX)
+                    macro=_MACRO_ON_POSIX,
+                    compilation_info=ExternalCompilationInfo(includes=['unistd.h']))
 
 @replace_os_function('chroot')
 def chroot(path):

From pypy.commits at gmail.com  Mon Nov 20 10:43:40 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 07:43:40 -0800 (PST)
Subject: [pypy-commit] pypy default: silence a warning on OS X
Message-ID: <5a12f82c.8b951c0a.cd426.8d23@mx.google.com>

Author: fijal
Branch: 
Changeset: r93099:6c9c3791d06a
Date: 2017-11-20 16:31 +0100
http://bitbucket.org/pypy/pypy/changeset/6c9c3791d06a/

Log:	silence a warning on OS X

diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -1881,7 +1881,8 @@
 
 c_chroot = external('chroot', [rffi.CCHARP], rffi.INT,
                     save_err=rffi.RFFI_SAVE_ERRNO,
-                    macro=_MACRO_ON_POSIX)
+                    macro=_MACRO_ON_POSIX,
+                    compilation_info=ExternalCompilationInfo(includes=['unistd.h']))
 
 @replace_os_function('chroot')
 def chroot(path):

From pypy.commits at gmail.com  Mon Nov 20 10:43:42 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 07:43:42 -0800 (PST)
Subject: [pypy-commit] pypy default: merge
Message-ID: <5a12f82e.cf2f1c0a.40dce.6652@mx.google.com>

Author: fijal
Branch: 
Changeset: r93100:21fd35c44d66
Date: 2017-11-20 16:42 +0100
http://bitbucket.org/pypy/pypy/changeset/21fd35c44d66/

Log:	merge

diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -15,34 +15,10 @@
     typeof, s_ImpossibleValue, SomeInstance, intersection, difference)
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
-from collections import deque
 
 log = AnsiLogger("annrpython")
 
 
-class ShuffleDict(object):
-    def __init__(self):
-        self._d = {}
-        self.keys = deque()
-
-    def __setitem__(self, k, v):
-        if k in self._d:
-            self._d[k] = v
-        else:
-            self._d[k] = v
-            self.keys.append(k)
-
-    def __getitem__(self, k):
-        return self._d[k]
-
-    def popitem(self):
-        key = self.keys.popleft()
-        item = self._d.pop(key)
-        return (key, item)
-
-    def __nonzero__(self):
-        return bool(self._d)
-
 class RPythonAnnotator(object):
     """Block annotator for RPython.
     See description in doc/translation.txt."""
@@ -57,7 +33,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = ShuffleDict()  # map {block: graph-containing-it}
+        self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
@@ -81,7 +57,7 @@
         self.errors = []
 
     def __getstate__(self):
-        attrs = """translator pendingblocks annotated links_followed
+        attrs = """translator genpendingblocks annotated links_followed
         notify bookkeeper frozen policy added_blocks""".split()
         ret = self.__dict__.copy()
         for key, value in ret.items():
@@ -212,19 +188,47 @@
             else:
                 self.mergeinputargs(graph, block, cells)
             if not self.annotated[block]:
-                self.pendingblocks[block] = graph
+                self.schedulependingblock(graph, block)
+
+    def schedulependingblock(self, graph, block):
+        # 'self.genpendingblocks' is a list of dictionaries which is
+        # logically equivalent to just one dictionary.  But we keep a
+        # 'generation' number on each block (=key), and whenever we
+        # process a block, we increase its generation number.  The
+        # block is added to the 'genpendingblocks' indexed by its
+        # generation number.  See complete_pending_blocks() below.
+        generation = getattr(block, 'generation', 0)
+        self.genpendingblocks[generation][block] = graph
 
     def complete_pending_blocks(self):
-        while self.pendingblocks:
-            block, graph = self.pendingblocks.popitem()
-            self.processblock(graph, block)
+        while True:
+            # Find the first of the dictionaries in 'self.genpendingblocks'
+            # which is not empty
+            gen = 0
+            for pendingblocks in self.genpendingblocks:
+                if pendingblocks:
+                    break
+                gen += 1
+            else:
+                return    # all empty => done
+
+            gen += 1   # next generation number
+            if len(self.genpendingblocks) == gen:
+                self.genpendingblocks.append({})
+
+            # Process all blocks at this level
+            # (if any gets re-inserted, it will be into the next level)
+            while pendingblocks:
+                block, graph = pendingblocks.popitem()
+                block.generation = gen
+                self.processblock(graph, block)
 
     def complete(self):
         """Process pending blocks until none is left."""
         while True:
             self.complete_pending_blocks()
             self.policy.no_more_blocks_to_annotate(self)
-            if not self.pendingblocks:
+            if not any(self.genpendingblocks):
                 break   # finished
         # make sure that the return variables of all graphs is annotated
         if self.added_blocks is not None:
@@ -410,7 +414,7 @@
     def reflowpendingblock(self, graph, block):
         assert not self.frozen
         assert graph not in self.fixed_graphs
-        self.pendingblocks[block] = graph
+        self.schedulependingblock(graph, block)
         assert block in self.annotated
         self.annotated[block] = False  # must re-flow
         self.blocked_blocks[block] = (graph, None)
diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py
--- a/rpython/flowspace/model.py
+++ b/rpython/flowspace/model.py
@@ -170,7 +170,7 @@
 
 class Block(object):
     __slots__ = """inputargs operations exitswitch
-                exits blockcolor""".split()
+                exits blockcolor generation""".split()
 
     def __init__(self, inputargs):
         self.inputargs = list(inputargs)  # mixed list of variable/const XXX
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -164,23 +164,25 @@
 
     @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
     def main(self, code, count):
+        code = self.MyCode('py:main:3:main')
+        rvmprof.register_code(code, self.MyCode.get_name)
+        code = self.MyCode('py:code:7:native_func')
+        rvmprof.register_code(code, self.MyCode.get_name)
         if count > 0:
             return self.main(code, count-1)
         else:
             return self.native_func(100)
 
     def test(self):
-        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
-        # never ran it. FIXME.
         from vmprof import read_profile
-        from vmprof.show import PrettyPrinter
+        # from vmprof.show import PrettyPrinter
         assert self.rpy_entry_point(3, 0.5) == 42000
         assert self.tmpfile.check()
-        #
+
         prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
-        p = PrettyPrinter()
-        p._print_tree(tree)
+        # p = PrettyPrinter()
+        # p._print_tree(tree)
         def walk(tree, symbols):
             symbols.append(tree.name)
             if len(tree.children) == 0:
@@ -189,7 +191,7 @@
                 walk(child, symbols)
         symbols = []
         walk(tree, symbols)
-        not_found = ['n:native_func']
+        not_found = ['py:code:7:native_func']
         for sym in symbols:
             for i,name in enumerate(not_found):
                 if sym.startswith(name):

From pypy.commits at gmail.com  Mon Nov 20 10:55:02 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 07:55:02 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fixes until we get to formatting
 problems
Message-ID: <5a12fad6.08e31c0a.60024.9008@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93101:f074b4987d57
Date: 2017-11-20 16:54 +0100
http://bitbucket.org/pypy/pypy/changeset/f074b4987d57/

Log:	fixes until we get to formatting problems

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1759,20 +1759,6 @@
 
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
-
-    @specialize.argtype(1)
-    def unicode_w(self, w_obj):
-        return self.utf8_w(w_obj).decode('utf8')
-
-    def realunicode_w(self, w_obj):
-        return self.realutf8_w(w_obj).decode('utf8')
-
-    def newunicode(self, u):
-        from pypy.interpreter import unicodehelper
-        assert isinstance(u, unicode)
-        # XXX let's disallow that
-        return self.newutf8(u.encode("utf8"), len(u), unicodehelper._get_flag(u))
-
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py
--- a/pypy/interpreter/test/test_unicodehelper.py
+++ b/pypy/interpreter/test/test_unicodehelper.py
@@ -61,10 +61,10 @@
 
 @given(strategies.text())
 def test_unicode_raw_escape(u):
-    r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict')
+    r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None)
     assert r == u.encode("raw-unicode-escape")
 
 @given(strategies.text())
 def test_unicode_escape(u):
-    r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict")
+    r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None)
     assert r == u.encode("unicode-escape")
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -60,14 +60,12 @@
             return True
     return False
 
-def _get_flag(u):
-    flag = rutf8.FLAG_ASCII
-    for c in u:
-        if 0xD800 <= ord(c) <= 0xDFFF:
-            return rutf8.FLAG_HAS_SURROGATES
-        if ord(c) >= 0x80:
-            flag = rutf8.FLAG_REGULAR
-    return flag
+def get_flag_from_code(oc):
+    if oc <= 0x7F:
+        return rutf8.FLAG_ASCII
+    if 0xD800 <= oc <= 0xDFFF:
+        return rutf8.FLAG_HAS_SURROGATES
+    return rutf8.FLAG_REGULAR
 
 # These functions take and return unwrapped rpython strings
 def decode_unicode_escape(space, string):
@@ -134,7 +132,11 @@
     return ress, len(s), lgt, flag
 
 def str_decode_latin_1(s, errors, final, errorhandler):
-    xxx
+    try:
+        rutf8.check_ascii(s)
+        return s, len(s), len(s), rutf8.FLAG_ASCII
+    except rutf8.CheckError:
+        return _str_decode_latin_1_slowpath(s, errors, final, errorhandler)
 
 def utf8_encode_latin_1(s, errors, errorhandler):
     try:
@@ -208,7 +210,6 @@
     slen = len(s)
     res = StringBuilder(slen)
     pos = 0
-    continuation_bytes = 0
     end = len(s)
     while pos < end:
         ordch1 = ord(s[pos])
@@ -229,6 +230,7 @@
         if ordch1 <= 0xDF:
             if pos >= end:
                 if not final:
+                    pos -= 1
                     break
                 r, pos = errorhandler(errors, "utf8", "unexpected end of data",
                     s, pos - 1, pos)
@@ -243,7 +245,6 @@
                 continue
             # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
             pos += 1
-            continuation_bytes += 1
             res.append(chr(ordch1))
             res.append(chr(ordch2))
             continue
@@ -251,6 +252,7 @@
         if ordch1 <= 0xEF:
             if (pos + 2) > end:
                 if not final:
+                    pos -= 1
                     break
                 r, pos = errorhandler(errors, "utf8", "unexpected end of data",
                     s, pos - 1, pos + 1)
@@ -272,7 +274,6 @@
             pos += 2
 
             # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
-            continuation_bytes += 2
             res.append(chr(ordch1))
             res.append(chr(ordch2))
             res.append(chr(ordch3))
@@ -281,6 +282,7 @@
         if ordch1 <= 0xF4:
             if (pos + 3) > end:
                 if not final:
+                    pos -= 1
                     break
                 r, pos = errorhandler(errors, "utf8", "unexpected end of data",
                     s, pos - 1, pos)
@@ -312,15 +314,12 @@
             res.append(chr(ordch2))
             res.append(chr(ordch3))
             res.append(chr(ordch4))
-            continuation_bytes += 3
             continue
 
         r, pos = errorhandler(errors, "utf8", "invalid start byte",
                 s, pos - 1, pos)
         res.append(r)
 
-    assert pos == end
-    assert pos - continuation_bytes >= 0
     r = res.build()
     lgt, flag = rutf8.check_utf8(r, True)
     return r, pos, lgt, flag
@@ -352,19 +351,14 @@
         else:
             # when we get here, chr is a 32-bit unicode character
             if chr > 0x10ffff:
-                UUU
                 message = "illegal Unicode character"
                 res, pos = errorhandler(errors, encoding,
                                         message, s, pos-2, pos+digits)
+                size, flag = rutf8.check_utf8(res)
                 builder.append(res)
             else:
                 rutf8.unichr_as_utf8_append(builder, chr, True)
-                if chr <= 0x7f:
-                    flag = rutf8.FLAG_ASCII
-                elif 0xd800 <= chr <= 0xdfff:
-                    flag = rutf8.FLAG_HAS_SURROGATES
-                else:
-                    flag = rutf8.FLAG_REGULAR
+                flag = get_flag_from_code(chr)
                 pos += digits
                 size = 1
 
@@ -508,22 +502,22 @@
                         builder.append(res)
                         continue
                     pos = look + 1
-                    XXX
-                    if code <= MAXUNICODE:
-                        builder.append(UNICHR(code))
-                    else:
-                        code -= 0x10000L
-                        builder.append(unichr(0xD800 + (code >> 10)))
-                        builder.append(unichr(0xDC00 + (code & 0x03FF)))
+                    outsize += 1
+                    flag = combine_flags(flag, get_flag_from_code(code))
+                    rutf8.unichr_as_utf8_append(builder, code)
                 else:
-                    YYY
                     res, pos = errorhandler(errors, "unicodeescape",
                                             message, s, pos-1, look+1)
+                    newsize, newflag = rutf8.check_utf8(res, True)
+                    flag = combine_flags(flag, newflag)
+                    outsize += newsize
                     builder.append(res)
             else:
-                AAA
                 res, pos = errorhandler(errors, "unicodeescape",
                                         message, s, pos-1, look+1)
+                newsize, newflag = rutf8.check_utf8(res, True)
+                flag = combine_flags(flag, newflag)
+                outsize += newsize
                 builder.append(res)
         else:
             builder.append('\\')
@@ -602,7 +596,7 @@
     for i in range(zeros-1, -1, -1):
         result.append(TABLE[(char >> (4 * i)) & 0x0f])
 
-def utf8_encode_raw_unicode_escape(s, errors, errorhandler=None):
+def utf8_encode_raw_unicode_escape(s, errors, errorhandler):
     # errorhandler is not used: this function cannot cause Unicode errors
     size = len(s)
     if size == 0:
@@ -621,7 +615,7 @@
     return result.build()
 
 
-def utf8_encode_unicode_escape(s, errors):
+def utf8_encode_unicode_escape(s, errors, errorhandler):
     return _utf8_encode_unicode_escape(s)
 
 # ____________________________________________________________
@@ -851,7 +845,7 @@
     assert final_length >= 0
     return result.build()[:final_length], pos, outsize, flag
 
-def utf8_encode_utf_7(s, errors, errorhandler=None):
+def utf8_encode_utf_7(s, errors, errorhandler):
     size = len(s)
     if size == 0:
         return ''
@@ -1294,3 +1288,153 @@
                              errorhandler=None, allow_surrogates=True):
     return unicode_encode_utf_32_helper(s, errors, errorhandler,
                                         allow_surrogates, "little")
+
+# ____________________________________________________________
+# unicode-internal
+
+def str_decode_unicode_internal(s, errors, final=False,
+                                errorhandler=None):
+    size = len(s)
+    if size == 0:
+        return '', 0, 0, rutf8.FLAG_ASCII
+
+    unicode_bytes = 4
+    if BYTEORDER == "little":
+        start = 0
+        stop = unicode_bytes
+        step = 1
+    else:
+        start = unicode_bytes - 1
+        stop = -1
+        step = -1
+
+    result = StringBuilder(size)
+    pos = 0
+    while pos < size:
+        if pos > size - unicode_bytes:
+            res, pos = errorhandler(errors, "unicode_internal",
+                                    "truncated input",
+                                    s, pos, size)
+            result.append(res)
+            if pos > size - unicode_bytes:
+                break
+            continue
+        t = r_uint(0)
+        h = 0
+        for j in range(start, stop, step):
+            t += r_uint(ord(s[pos + j])) << (h*8)
+            h += 1
+        if t > 0x10ffff:
+            res, pos = errorhandler(errors, "unicode_internal",
+                                    "unichr(%d) not in range" % (t,),
+                                    s, pos, pos + unicode_bytes)
+            result.append(res)
+            continue
+        rutf8.unichr_as_utf8_append(result, intmask(t))
+        pos += unicode_bytes
+    r = result.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return r, pos, lgt, flag
+
+def utf8_encode_unicode_internal(s, errors, errorhandler):
+    size = len(s)
+    if size == 0:
+        return ''
+
+    result = StringBuilder(size * 4)
+    pos = 0
+    while pos < size:
+        oc = rutf8.codepoint_at_pos(s, pos)
+        if BYTEORDER == "little":
+            result.append(chr(oc       & 0xFF))
+            result.append(chr(oc >>  8 & 0xFF))
+            result.append(chr(oc >> 16 & 0xFF))
+            result.append(chr(oc >> 24 & 0xFF))
+        else:
+            result.append(chr(oc >> 24 & 0xFF))
+            result.append(chr(oc >> 16 & 0xFF))
+            result.append(chr(oc >>  8 & 0xFF))
+            result.append(chr(oc       & 0xFF))
+        pos = rutf8.next_codepoint_pos(s, pos)
+
+    return result.build()
+
+# ____________________________________________________________
+# Charmap
+
+ERROR_CHAR = u'\ufffe'.encode('utf8')
+
+ at specialize.argtype(4)
+def str_decode_charmap(s, errors, final=False,
+                       errorhandler=None, mapping=None):
+    "mapping can be a rpython dictionary, or a dict-like object."
+
+    # Default to Latin-1
+    if mapping is None:
+        return str_decode_latin_1(s, errors, final=final,
+                                  errorhandler=errorhandler)
+    size = len(s)
+    if size == 0:
+        return '', 0, 0, rutf8.FLAG_ASCII
+
+    pos = 0
+    result = StringBuilder(size)
+    while pos < size:
+        ch = s[pos]
+
+        c = mapping.get(ch, ERROR_CHAR)
+        if c == ERROR_CHAR:
+            r, pos = errorhandler(errors, "charmap",
+                                  "character maps to <undefined>",
+                                  s,  pos, pos + 1)
+            result.append(r)
+            continue
+        result.append(c)
+        pos += 1
+    r = result.build()
+    lgt, flag = rutf8.check_utf8(r, True)
+    return r, pos, lgt, flag
+
+def utf8_encode_charmap(s, errors, errorhandler=None,
+                           mapping=None):
+    YYY
+    if mapping is None:
+        return unicode_encode_latin_1(s, size, errors,
+                                      errorhandler=errorhandler)
+
+    if errorhandler is None:
+        errorhandler = default_unicode_error_encode
+
+    if size == 0:
+        return ''
+    result = StringBuilder(size)
+    pos = 0
+    while pos < size:
+        ch = s[pos]
+
+        c = mapping.get(ch, '')
+        if len(c) == 0:
+            # collect all unencodable chars. Important for narrow builds.
+            collend = pos + 1
+            while collend < size and mapping.get(s[collend], '') == '':
+                collend += 1
+            ru, rs, pos = errorhandler(errors, "charmap",
+                                       "character maps to <undefined>",
+                                       s, pos, collend)
+            if rs is not None:
+                # py3k only
+                result.append(rs)
+                continue
+            for ch2 in ru:
+                c2 = mapping.get(ch2, '')
+                if len(c2) == 0:
+                    errorhandler(
+                        "strict", "charmap",
+                        "character maps to <undefined>",
+                        s,  pos, pos + 1)
+                result.append(c2)
+            continue
+        result.append(c)
+        pos += 1
+    return result.build()
+
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,7 +1,6 @@
 from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
 from rpython.rlib.rstring import UnicodeBuilder
-from rpython.rlib.runicode import code_to_unichr, MAXUNICODE
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -563,14 +562,14 @@
 
         if space.isinstance_w(w_ch, space.w_unicode):
             # Charmap may return a unicode string
-            return space.unicode_w(w_ch)
+            return space.utf8_w(w_ch)
         elif space.isinstance_w(w_ch, space.w_int):
             # Charmap may return a number
             x = space.int_w(w_ch)
             if not 0 <= x <= 0x10FFFF:
                 raise oefmt(space.w_TypeError,
                     "character mapping must be in range(0x110000)")
-            return code_to_unichr(x)
+            return rutf8.unichr_as_utf8(x)
         elif space.is_w(w_ch, space.w_None):
             # Charmap may return None
             return errorchar
@@ -614,12 +613,13 @@
 
 @unwrap_spec(string='bufferstr', errors='text_or_none')
 def charmap_decode(space, string, errors="strict", w_mapping=None):
-    from pypy.interpreter.unicodehelper import DecodeWrapper
+    from pypy.interpreter import unicodehelper
 
     if errors is None:
         errors = 'strict'
     if len(string) == 0:
-        return space.newtuple([space.newunicode(u''), space.newint(0)])
+        return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII),
+                               space.newint(0)])
 
     if space.is_none(w_mapping):
         mapping = None
@@ -628,14 +628,14 @@
 
     final = True
     state = space.fromcache(CodecState)
-    result, consumed = runicode.str_decode_charmap(
-        string, len(string), errors,
-        final, DecodeWrapper(state.decode_error_handler).handle, mapping)
-    return space.newtuple([space.newunicode(result), space.newint(consumed)])
+    result, consumed, lgt, flag = unicodehelper.str_decode_charmap(
+        string, errors, final, state.decode_error_handler, mapping)
+    return space.newtuple([space.newutf8(result, lgt, flag),
+                           space.newint(consumed)])
 
 @unwrap_spec(utf8='utf8', errors='text_or_none')
 def charmap_encode(space, utf8, errors="strict", w_mapping=None):
-    from pypy.interpreter.unicodehelper import EncodeWrapper
+    from pypy.interpreter import unicodehelper
 
     if errors is None:
         errors = 'strict'
@@ -645,10 +645,8 @@
         mapping = Charmap_Encode(space, w_mapping)
 
     state = space.fromcache(CodecState)
-    uni = utf8.decode('utf8')
-    result = runicode.unicode_encode_charmap(
-        uni, len(uni), errors,
-        EncodeWrapper(state.encode_error_handler).handle, mapping)
+    result = unicodehelper.unicode_encode_charmap(
+        utf8, errors, state.encode_error_handler, mapping)
     return space.newtuple([space.newbytes(result), space.newint(len(uni))])
 
 
@@ -707,7 +705,7 @@
 
 @unwrap_spec(errors='text_or_none')
 def unicode_internal_decode(space, w_string, errors="strict"):
-    from pypy.interpreter.unicodehelper import DecodeWrapper
+    from pypy.interpreter import unicodehelper
 
     if errors is None:
         errors = 'strict'
@@ -718,14 +716,16 @@
     string = space.readbuf_w(w_string).as_str()
 
     if len(string) == 0:
-        return space.newtuple([space.newunicode(u''), space.newint(0)])
+        return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII),
+                               space.newint(0)])
 
     final = True
     state = space.fromcache(CodecState)
-    result, consumed = runicode.str_decode_unicode_internal(
-        string, len(string), errors,
-        final, DecodeWrapper(state.decode_error_handler).handle)
-    return space.newtuple([space.newunicode(result), space.newint(consumed)])
+    result, consumed, lgt, flag = unicodehelper.str_decode_unicode_internal(
+        string, errors,
+        final, state.decode_error_handler)
+    return space.newtuple([space.newutf8(result, lgt, flag),
+                           space.newint(consumed)])
 
 # ____________________________________________________________
 # support for the "string escape" codec
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -15,7 +15,6 @@
                          'utf-32', 'utf-32-le', 'utf-32-be',
                          'raw_unicode_escape',
                          'unicode_escape', 'unicode_internal'):
-            print encoding
             assert unicode(u.encode(encoding),encoding) == u
 
     def test_ucs4(self):
diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py
--- a/pypy/module/exceptions/interp_exceptions.py
+++ b/pypy/module/exceptions/interp_exceptions.py
@@ -285,7 +285,7 @@
 
     def descr_init(self, space, w_object, w_start, w_end, w_reason):
         # typechecking
-        space.realunicode_w(w_object)
+        space.utf8_w(w_object)
         space.int_w(w_start)
         space.int_w(w_end)
         space.realtext_w(w_reason)
@@ -719,7 +719,7 @@
     def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason):
         # typechecking
         space.realtext_w(w_encoding)
-        space.realunicode_w(w_object)  # XXX realutf8()?
+        space.utf8_w(w_object)
         space.int_w(w_start)
         space.int_w(w_end)
         space.realtext_w(w_reason)
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -432,8 +432,7 @@
 
         def fmt_s(self, w_value):
             space = self.space
-            got_unicode = space.isinstance_w(w_value,
-                                                         space.w_unicode)
+            got_unicode = space.isinstance_w(w_value, space.w_unicode)
             if not do_unicode:
                 if got_unicode:
                     raise NeedUnicodeFormattingError
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -164,9 +164,9 @@
         if isinstance(x, str):
             return self.newtext(x)
         if isinstance(x, unicode):
-            from pypy.interpreter import unicodehelper
-            return self.newutf8(x.encode('utf8'), len(x),
-                                unicodehelper._get_flag(x))
+            x = x.encode('utf8')
+            lgt, flag = rutf8.check_utf8(x, True)
+            return self.newutf8(x, lgt, flag)
         if isinstance(x, float):
             return W_FloatObject(x)
         if isinstance(x, W_Root):

From pypy.commits at gmail.com  Mon Nov 20 13:35:20 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 10:35:20 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Bug-for-bug compatibility (and
 performance optimisation) in BufferedReader.readinto1()
Message-ID: <5a132068.21b9df0a.dcef.7ba2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93102:feaba8e9bb0a
Date: 2017-11-20 18:34 +0000
http://bitbucket.org/pypy/pypy/changeset/feaba8e9bb0a/

Log:	Bug-for-bug compatibility (and performance optimisation) in
	BufferedReader.readinto1()

diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py
--- a/pypy/module/_io/interp_bufferedio.py
+++ b/pypy/module/_io/interp_bufferedio.py
@@ -111,14 +111,15 @@
         self._unsupportedoperation(space, "detach")
 
     def readinto_w(self, space, w_buffer):
-        return self._readinto(space, w_buffer, "read")
+        return self._readinto(space, w_buffer, read_once=False)
 
     def readinto1_w(self, space, w_buffer):
-        return self._readinto(space, w_buffer, "read1")
+        return self._readinto(space, w_buffer, read_once=True)
 
-    def _readinto(self, space, w_buffer, methodname):
+    def _readinto(self, space, w_buffer, read_once):
         rwbuffer = space.writebuf_w(w_buffer)
         length = rwbuffer.getlength()
+        methodname = "read1" if read_once else "read"
         w_data = space.call_method(self, methodname, space.newint(length))
 
         if not space.isinstance_w(w_data, space.w_bytes):
@@ -882,6 +883,52 @@
         self._reader_reset_buf()
         self.state = STATE_OK
 
+    def _readinto(self, space, w_buffer, read_once):
+        rwbuffer = space.writebuf_w(w_buffer)
+        length = rwbuffer.getlength()
+        with self.lock:
+            have = self._readahead()
+            if have >= length:
+                rwbuffer.setslice(0, self.buffer[self.pos:self.pos + length])
+                return space.newint(length)
+            written = 0
+            if have > 0:
+                rwbuffer.setslice(0, self.buffer[self.pos:self.read_end])
+                written = have
+
+            while written < length:
+                if self.writable:
+                    self._flush_and_rewind_unlocked(space)
+                self._reader_reset_buf()
+                self.pos = 0
+                if written + len(self.buffer) < length:
+                    try:
+                        got = self._raw_read(space, rwbuffer, written, length - written)
+                        written += got
+                    except BlockingIOError:
+                        got = 0
+                    if got == 0:
+                        break
+                elif read_once and written:
+                    break
+                else:
+                    try:
+                        have = self._fill_buffer(space)
+                    except BlockingIOError:
+                        have = 0
+                    if have == 0:
+                        break
+                    endpos = min(have, length - written)
+                    assert endpos >= 0
+                    rwbuffer.setslice(written, self.buffer[0:endpos])
+                    written += endpos
+                    self.pos = endpos
+                if read_once:
+                    break
+            return space.newint(written)
+
+
+
 W_BufferedReader.typedef = TypeDef(
     '_io.BufferedReader', W_BufferedIOBase.typedef,
     __new__ = generic_new_descr(W_BufferedReader),
diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py
--- a/pypy/module/_io/test/test_bufferedio.py
+++ b/pypy/module/_io/test/test_bufferedio.py
@@ -189,6 +189,31 @@
         b = bytearray(2)
         raises(ValueError, bufio.readinto, b)
 
+    def test_readinto1(self):
+        import _io
+
+        class MockIO(_io._IOBase):
+            def readable(self):
+                return True
+
+            def readinto(self, buf):
+                buf[:3] = b"abc"
+                return 3
+        bufio = _io.BufferedReader(MockIO(), buffer_size=5)
+        buf = bytearray(10)
+        bufio.read(2)
+        n = bufio.readinto1(buf)
+        assert n == 4
+        assert buf[:n] == b'cabc'
+
+        # Yes, CPython's observable behavior depends on buffer_size!
+        bufio = _io.BufferedReader(MockIO(), buffer_size=20)
+        buf = bytearray(10)
+        bufio.read(2)
+        n = bufio.readinto1(buf)
+        assert n == 1
+        assert buf[:n] == b'c'
+
     def test_seek(self):
         import _io
         raw = _io.FileIO(self.tmpfile)

From pypy.commits at gmail.com  Mon Nov 20 13:49:39 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 10:49:39 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Remove comment: this test is not supposed
 to fail any more
Message-ID: <5a1323c3.8190df0a.bcb98.95d4@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93103:a5d1206f11e4
Date: 2017-11-20 18:49 +0000
http://bitbucket.org/pypy/pypy/changeset/a5d1206f11e4/

Log:	Remove comment: this test is not supposed to fail any more

diff --git a/lib-python/3/test/test_io.py b/lib-python/3/test/test_io.py
--- a/lib-python/3/test/test_io.py
+++ b/lib-python/3/test/test_io.py
@@ -1169,12 +1169,7 @@
         b = bytearray(2*buffer_size)
         self.assertEqual(bufio.peek(3), b'fgh')
         self.assertEqual(rawio._reads, 3)
-        self.assertEqual(bufio.readinto1(b), 6)  # fails because of
-        # an apparent inconsistency in CPython: readinto1(), if the
-        # buffered amount is smaller, would always issue one raw read()
-        # call.  This differs from read1(), which if the buffered amount
-        # if smaller (but more than zero), would just return it without
-        # any raw read() call.  In PyPy both have the behavior of read1().
+        self.assertEqual(bufio.readinto1(b), 6)
         self.assertEqual(b[:6], b"fghjkl")
         self.assertEqual(rawio._reads, 4)
 

From pypy.commits at gmail.com  Mon Nov 20 14:06:31 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 11:06:31 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix tests to match PyPy behaviour
Message-ID: <5a1327b7.46901c0a.7be1d.a6b8@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93104:5c2561dd0c89
Date: 2017-11-20 19:06 +0000
http://bitbucket.org/pypy/pypy/changeset/5c2561dd0c89/

Log:	Fix tests to match PyPy behaviour

diff --git a/lib-python/3/test/test_pydoc.py b/lib-python/3/test/test_pydoc.py
--- a/lib-python/3/test/test_pydoc.py
+++ b/lib-python/3/test/test_pydoc.py
@@ -141,7 +141,7 @@
 <tr bgcolor="#aa55cc">
 <td colspan=3 valign=bottom>&nbsp;<br>
 <font color="#ffffff" face="helvetica, arial"><big><strong>Modules</strong></big></font></td></tr>
-
+\x20\x20\x20\x20
 <tr><td bgcolor="#aa55cc"><tt>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</tt></td><td>&nbsp;</td>
 <td width="100%%"><table width="100%%" summary="list"><tr><td width="25%%" valign=top><a href="builtins.html">builtins</a><br>
 </td><td width="25%%" valign=top></td><td width="25%%" valign=top></td><td width="25%%" valign=top></td></tr></table></td></tr></table><p>
@@ -878,7 +878,7 @@
     @requires_docstrings
     def test_unbound_builtin_method(self):
         self.assertEqual(self._get_summary_line(pickle.Pickler.dump),
-            "dump(self, obj, /)")
+            "dump(self, obj)")
 
     # these no longer include "self"
     def test_bound_python_method(self):
@@ -891,13 +891,13 @@
         s = StringIO()
         p = pickle.Pickler(s)
         self.assertEqual(self._get_summary_line(p.dump),
-            "dump(obj, /) method of _pickle.Pickler instance")
+            "dump(obj) method of pickle._Pickler instance")
 
     # this should *never* include self!
     @requires_docstrings
     def test_module_level_callable(self):
         self.assertEqual(self._get_summary_line(os.stat),
-            "stat(path, *, dir_fd=None, follow_symlinks=True)")
+            "stat(path, *, dir_fd=-100, follow_symlinks=True)")
 
 
 @unittest.skipUnless(threading, 'Threading required for this test.')

From pypy.commits at gmail.com  Mon Nov 20 14:25:17 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 11:25:17 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Skip tracemalloc tests
Message-ID: <5a132c1d.098a1c0a.cf682.6f3d@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93105:a84f8ceb8740
Date: 2017-11-20 19:24 +0000
http://bitbucket.org/pypy/pypy/changeset/a84f8ceb8740/

Log:	Skip tracemalloc tests

diff --git a/lib-python/3/test/test_tracemalloc.py b/lib-python/3/test/test_tracemalloc.py
--- a/lib-python/3/test/test_tracemalloc.py
+++ b/lib-python/3/test/test_tracemalloc.py
@@ -1,7 +1,6 @@
 import contextlib
 import os
 import sys
-import tracemalloc
 import unittest
 from unittest.mock import patch
 from test.support.script_helper import (assert_python_ok, assert_python_failure,
@@ -12,6 +11,11 @@
 except ImportError:
     threading = None
 
+try:
+    import tracemalloc
+except ImportError:
+    raise unittest.SkipTest("tracemalloc is required")
+
 EMPTY_STRING_SIZE = sys.getsizeof(b'')
 
 def get_frames(nframe, lineno_delta):

From pypy.commits at gmail.com  Mon Nov 20 17:06:05 2017
From: pypy.commits at gmail.com (fijal)
Date: Mon, 20 Nov 2017 14:06:05 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: work on formatting
Message-ID: <5a1351cd.7a86df0a.46775.559e@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93106:b2f3bd9151c0
Date: 2017-11-20 23:05 +0100
http://bitbucket.org/pypy/pypy/changeset/b2f3bd9151c0/

Log:	work on formatting

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -25,7 +25,7 @@
     # Fast version of the "strict" errors handler.
     def raise_unicode_exception_encode(errors, encoding, msg, utf8,
                                        startingpos, endingpos):
-        u_len, flag = rutf8.check_utf8(utf8)
+        u_len, flag = rutf8.check_utf8(utf8, True)
         raise OperationError(space.w_UnicodeEncodeError,
                              space.newtuple([space.newtext(encoding),
                                              space.newutf8(utf8, u_len, flag),
@@ -60,13 +60,6 @@
             return True
     return False
 
-def get_flag_from_code(oc):
-    if oc <= 0x7F:
-        return rutf8.FLAG_ASCII
-    if 0xD800 <= oc <= 0xDFFF:
-        return rutf8.FLAG_HAS_SURROGATES
-    return rutf8.FLAG_REGULAR
-
 # These functions take and return unwrapped rpython strings
 def decode_unicode_escape(space, string):
     state = space.fromcache(interp_codecs.CodecState)
@@ -138,6 +131,24 @@
     except rutf8.CheckError:
         return _str_decode_latin_1_slowpath(s, errors, final, errorhandler)
 
+def _str_decode_latin_1_slowpath(s, errors, final, errorhandler):
+    res = StringBuilder(len(s))
+    i = 0
+    while i < len(s):
+        if ord(s[i]) > 0x7F:
+            while i < len(s) and ord(s[i]) > 0x7F:
+                rutf8.unichr_as_utf8_append(res, ord(s[i]))
+                i += 1
+        else:
+            start = i
+            end = i + 1
+            while end < len(s) and ord(s[end]) <= 0x7F:
+                end += 1
+            res.append_slice(s, start, end)
+            i = end
+    # cannot be ASCII, cannot have surrogates, I believe
+    return res.build(), len(s), len(s), rutf8.FLAG_REGULAR
+
 def utf8_encode_latin_1(s, errors, errorhandler):
     try:
         rutf8.check_ascii(s)
@@ -159,7 +170,6 @@
                 res.append(chr(oc))
                 i += 1
             else:
-                XXX
                 r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
@@ -358,7 +368,7 @@
                 builder.append(res)
             else:
                 rutf8.unichr_as_utf8_append(builder, chr, True)
-                flag = get_flag_from_code(chr)
+                flag = rutf8.get_flag_from_code(chr)
                 pos += digits
                 size = 1
 
@@ -503,7 +513,7 @@
                         continue
                     pos = look + 1
                     outsize += 1
-                    flag = combine_flags(flag, get_flag_from_code(code))
+                    flag = combine_flags(flag, rutf8.get_flag_from_code(code))
                     rutf8.unichr_as_utf8_append(builder, code)
                 else:
                     res, pos = errorhandler(errors, "unicodeescape",
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -189,14 +189,17 @@
         return new_bytearray(space, w_bytearraytype, [])
 
     def descr_reduce(self, space):
+        from pypy.interpreter.unicodehelper import str_decode_latin_1
+
         assert isinstance(self, W_BytearrayObject)
         w_dict = self.getdict(space)
         if w_dict is None:
             w_dict = space.w_None
+        s, _, lgt, flag = str_decode_latin_1(''.join(self.getdata()), 'strict',
+            True, None)
         return space.newtuple([
             space.type(self), space.newtuple([
-                space.newunicode(''.join(self.getdata()).decode('latin-1')),
-                space.newtext('latin-1')]),
+                space.newutf8(s, lgt, flag), space.newtext('latin-1')]),
             w_dict])
 
     @staticmethod
diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py
--- a/pypy/objspace/std/formatting.py
+++ b/pypy/objspace/std/formatting.py
@@ -1,11 +1,11 @@
 """String formatting routines"""
 import sys
 
-from rpython.rlib import jit
+from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rarithmetic import INT_MAX
 from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf
-from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.tool.sourcetools import func_with_new_name
 
@@ -153,18 +153,15 @@
     # to build two subclasses of the BaseStringFormatter class,
     # each one getting its own subtle differences and RPython types.
 
-    if do_unicode:
-        const = unicode
-    else:
-        const = str
-
     class StringFormatter(BaseStringFormatter):
         def __init__(self, space, fmt, values_w, w_valuedict):
             BaseStringFormatter.__init__(self, space, values_w, w_valuedict)
-            self.fmt = fmt    # either a string or a unicode
+            self.fmt = fmt    # always a string, if unicode, utf8 encoded
 
         def peekchr(self):
-            # return the 'current' character
+            # Return the 'current' character. Note that this returns utf8
+            # encoded part, but this is ok since we only need one-character
+            # comparisons
             try:
                 return self.fmt[self.fmtpos]
             except IndexError:
@@ -201,7 +198,8 @@
             if self.w_valuedict is None:
                 raise oefmt(space.w_TypeError, "format requires a mapping")
             if do_unicode:
-                w_key = space.newunicode(key)
+                lgt, flag = rutf8.check_utf8(key, True)
+                w_key = space.newutf8(key, lgt, flag)
             else:
                 w_key = space.newbytes(key)
             return space.getitem(self.w_valuedict, w_key)
@@ -287,10 +285,7 @@
         @jit.look_inside_iff(lambda self: jit.isconstant(self.fmt))
         def format(self):
             lgt = len(self.fmt) + 4 * len(self.values_w) + 10
-            if do_unicode:
-                result = UnicodeBuilder(lgt)
-            else:
-                result = StringBuilder(lgt)
+            result = StringBuilder(lgt)
             self.result = result
             while True:
                 # fast path: consume as many characters as possible
@@ -311,7 +306,7 @@
                 c = self.peekchr()
                 self.forward()
                 if c == '%':
-                    self.std_wp(const('%'))
+                    self.std_wp('%', False)
                     continue
                 if w_value is None:
                     w_value = self.nextinputvalue()
@@ -333,22 +328,27 @@
 
         def unknown_fmtchar(self):
             space = self.space
-            c = self.fmt[self.fmtpos - 1]
-            w_s = space.newunicode(c) if do_unicode else space.newbytes(c)
+            if do_unicode:
+                cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1)
+                flag = rutf8.get_flag_from_code(cp)
+                w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1, flag)
+            else:
+                cp = ord(self.fmt[self.fmtpos - 1])
+                w_s = space.newbytes(chr(cp))
             raise oefmt(space.w_ValueError,
                         "unsupported format character %R (%s) at index %d",
-                        w_s, hex(ord(c)), self.fmtpos - 1)
+                        w_s, hex(cp), self.fmtpos - 1)
 
-        @specialize.argtype(1)
-        def std_wp(self, r):
+        @specialize.arg(2)
+        def std_wp(self, r, is_string=False):
             length = len(r)
-            if do_unicode and isinstance(r, str):
+            if do_unicode and is_string:
                 # convert string to unicode using the default encoding
-                r = self.space.unicode_w(self.space.newbytes(r))
+                r = self.space.utf8_w(self.space.newbytes(r))
             prec = self.prec
             if prec == -1 and self.width == 0:
                 # fast path
-                self.result.append(const(r))
+                self.result.append(r)
                 return
             if prec >= 0 and prec < length:
                 length = prec   # ignore the end of the string if too long
@@ -358,12 +358,12 @@
                 padding = 0
             assert padding >= 0
             if not self.f_ljust and padding > 0:
-                result.append_multiple_char(const(' '), padding)
+                result.append_multiple_char(' ', padding)
                 # add any padding at the left of 'r'
                 padding = 0
             result.append_slice(r, 0, length)       # add 'r' itself
             if padding > 0:
-                result.append_multiple_char(const(' '), padding)
+                result.append_multiple_char(' ', padding)
             # add any remaining padding at the right
 
         def std_wp_number(self, r, prefix=''):
@@ -375,10 +375,10 @@
                 # result.append(), and no startswith() if not f_sign and
                 # not f_blank).
                 if self.f_sign and not r.startswith('-'):
-                    result.append(const('+'))
+                    result.append('+')
                 elif self.f_blank and not r.startswith('-'):
-                    result.append(const(' '))
-                result.append(const(r))
+                    result.append(' ')
+                result.append(r)
                 return
             # add a '+' or ' ' sign if necessary
             sign = r.startswith('-')
@@ -405,18 +405,18 @@
 
             assert padding >= 0
             if padnumber == '>':
-                result.append_multiple_char(const(' '), padding)
+                result.append_multiple_char(' ', padding)
                 # pad with spaces on the left
             if sign:
-                result.append(const(r[0]))        # the sign
-            result.append(const(prefix))               # the prefix
+                result.append(r[0])        # the sign
+            result.append(prefix)               # the prefix
             if padnumber == '0':
-                result.append_multiple_char(const('0'), padding)
+                result.append_multiple_char('0', padding)
                 # pad with zeroes
-            result.append_slice(const(r), int(sign), len(r))
+            result.append_slice(r, int(sign), len(r))
             # the rest of the number
             if padnumber == '<':           # spaces on the right
-                result.append_multiple_char(const(' '), padding)
+                result.append_multiple_char(' ', padding)
 
         def string_formatting(self, w_value):
             space = self.space
@@ -425,8 +425,7 @@
                 raise oefmt(space.w_TypeError,
                             "operand does not support unary str")
             w_result = space.get_and_call_function(w_impl, w_value)
-            if space.isinstance_w(w_result,
-                                              space.w_unicode):
+            if space.isinstance_w(w_result, space.w_unicode):
                 raise NeedUnicodeFormattingError
             return space.bytes_w(w_result)
 
@@ -443,11 +442,11 @@
                 else:
                     from pypy.objspace.std.unicodeobject import unicode_from_object
                     w_value = unicode_from_object(space, w_value)
-                s = space.unicode_w(w_value)
-            self.std_wp(s)
+                s = space.utf8_w(w_value)
+            self.std_wp(s, False)
 
         def fmt_r(self, w_value):
-            self.std_wp(self.space.text_w(self.space.repr(w_value)))
+            self.std_wp(self.space.text_w(self.space.repr(w_value)), True)
 
         def fmt_c(self, w_value):
             self.prec = -1     # just because
@@ -456,30 +455,30 @@
                 s = space.bytes_w(w_value)
                 if len(s) != 1:
                     raise oefmt(space.w_TypeError, "%c requires int or char")
-                self.std_wp(s)
+                self.std_wp(s, True)
             elif space.isinstance_w(w_value, space.w_unicode):
                 if not do_unicode:
                     raise NeedUnicodeFormattingError
-                ustr = space.unicode_w(w_value)
+                ustr = space.utf8_w(w_value)
                 if len(ustr) != 1:
                     raise oefmt(space.w_TypeError, "%c requires int or unichar")
-                self.std_wp(ustr)
+                self.std_wp(ustr, False)
             else:
                 n = space.int_w(w_value)
                 if do_unicode:
                     try:
-                        c = unichr(n)
+                        c = rutf8.unichr_as_utf8(n)
                     except ValueError:
                         raise oefmt(space.w_OverflowError,
                                     "unicode character code out of range")
-                    self.std_wp(c)
+                    self.std_wp(c, False)
                 else:
                     try:
                         s = chr(n)
                     except ValueError:
                         raise oefmt(space.w_OverflowError,
                                     "character code not in range(256)")
-                    self.std_wp(s)
+                    self.std_wp(s, True)
 
     return StringFormatter
 
@@ -510,11 +509,12 @@
             pass
         else:
             return space.newbytes(result)
-    # XXX for now, this is performance critical
-    fmt = space.utf8_w(w_fmt).decode("utf8")
+    fmt = space.utf8_w(w_fmt)
     formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
     result = formatter.format()
-    return space.newunicode(result)
+    # this can force strings, not sure if it's a problem or not
+    lgt, flag = rutf8.check_utf8(result, True)
+    return space.newutf8(result, lgt, flag)
 
 def mod_format(space, w_format, w_values, do_unicode=False):
     if space.isinstance_w(w_values, space.w_tuple):
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -4,11 +4,12 @@
 import string
 
 from pypy.interpreter.error import OperationError, oefmt
-from rpython.rlib import rstring, runicode, rlocale, rfloat, jit
+from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib.rfloat import copysign, formatd
 from rpython.rlib.rarithmetic import r_uint, intmask
 from pypy.interpreter.signature import Signature
+from pypy.interpreter import unicodehelper
 
 
 @specialize.argtype(1)
@@ -50,7 +51,8 @@
 
         if for_unicode:
             def wrap(self, u):
-                return self.space.newunicode(u)
+                lgt, flag = rutf8.check_utf8(u, True)
+                return self.space.newutf8(u, lgt, flag)
         else:
             def wrap(self, s):
                 return self.space.newbytes(s)
@@ -59,7 +61,6 @@
 
         def __init__(self, space, template):
             self.space = space
-            self.empty = u"" if self.is_unicode else ""
             self.template = template
 
         def build(self, args):
@@ -80,10 +81,7 @@
 
         def _build_string(self, start, end, level):
             space = self.space
-            if self.is_unicode:
-                out = rstring.UnicodeBuilder()
-            else:
-                out = rstring.StringBuilder()
+            out = rstring.StringBuilder()
             if not level:
                 raise oefmt(space.w_ValueError, "Recursion depth exceeded")
             level -= 1
@@ -344,7 +342,7 @@
                         w_conversion])
                     self.parser_list_w.append(w_entry)
                     self.last_end = end + 1
-                return self.empty
+                return ""
             #
             w_obj = self._get_argument(name)
             if conversion is not None:
@@ -352,7 +350,7 @@
             if recursive:
                 spec = self._build_string(spec_start, end, level)
             w_rendered = self.space.format(w_obj, self.wrap(spec))
-            unwrapper = "unicode_w" if self.is_unicode else "bytes_w"
+            unwrapper = "utf8_w" if self.is_unicode else "bytes_w"
             to_interp = getattr(self.space, unwrapper)
             return to_interp(w_rendered)
 
@@ -379,8 +377,10 @@
 def format_method(space, w_string, args, is_unicode):
     if is_unicode:
         template = unicode_template_formatter(space,
-                                              space.unicode_w(w_string))
-        return space.newunicode(template.build(args))
+                                              space.utf8_w(w_string))
+        r = template.build(args)
+        lgt, flag = rutf8.check_utf8(r, True)
+        return space.newutf8(r, lgt, flag)
     else:
         template = str_template_formatter(space, space.bytes_w(w_string))
         return space.newbytes(template.build(args))
@@ -416,7 +416,8 @@
 
         if for_unicode:
             def wrap(self, u):
-                return self.space.newunicode(u)
+                lgt, flag = rutf8.check_utf8(u, True)
+                return self.space.newutf8(u, lgt, flag)
         else:
             def wrap(self, s):
                 return self.space.newbytes(s)
@@ -426,7 +427,6 @@
 
         def __init__(self, space, spec):
             self.space = space
-            self.empty = u"" if self.is_unicode else ""
             self.spec = spec
 
         def _is_alignment(self, c):
@@ -492,8 +492,9 @@
                 presentation_type = spec[i]
                 if self.is_unicode:
                     try:
-                        the_type = spec[i].encode("ascii")[0]
-                    except UnicodeEncodeError:
+                        rutf8.check_utf8(spec[i], True)
+                        the_type = spec[i][0]
+                    except rutf8.CheckError:
                         raise oefmt(space.w_ValueError,
                                     "invalid presentation type")
                 else:
@@ -538,8 +539,9 @@
             return total
 
         def _lit(self, s):
+            assert len(s) == 1
             if self.is_unicode:
-                return s.decode("latin-1")
+                return rutf8.unichr_as_utf8(ord(s[0]))
             else:
                 return s
 
@@ -551,10 +553,7 @@
             return builder.build()
 
         def _builder(self):
-            if self.is_unicode:
-                return rstring.UnicodeBuilder()
-            else:
-                return rstring.StringBuilder()
+            return rstring.StringBuilder()
 
         def _unknown_presentation(self, tp):
             raise oefmt(self.space.w_ValueError,
@@ -598,8 +597,8 @@
                 thousands = ""
                 grouping = "\xFF"    # special value to mean 'stop'
             if self.is_unicode:
-                self._loc_dec = dec.decode("latin-1")
-                self._loc_thousands = thousands.decode("latin-1")
+                self._loc_dec = rutf8.decode_latin_1(dec)
+                self._loc_thousands = rutf8.decode_latin_1(thousands)
             else:
                 self._loc_dec = dec
                 self._loc_thousands = thousands
@@ -718,7 +717,7 @@
                 ts = self._loc_thousands if need_separator else None
                 self._fill_digits(buf, digits, left, n_chars, n_zeros, ts)
             buf.reverse()
-            self._grouped_digits = self.empty.join(buf)
+            self._grouped_digits = "".join(buf)
 
         def _upcase_string(self, s):
             buf = []
@@ -727,7 +726,7 @@
                 if ord("a") <= index <= ord("z"):
                     c = chr(index - 32)
                 buf.append(c)
-            return self.empty.join(buf)
+            return "".join(buf)
 
 
         def _fill_number(self, spec, num, to_digits, to_prefix, fill_char,
@@ -736,10 +735,7 @@
             if spec.n_lpadding:
                 out.append_multiple_char(fill_char[0], spec.n_lpadding)
             if spec.n_sign:
-                if self.is_unicode:
-                    sign = spec.sign.decode("latin-1")
-                else:
-                    sign = spec.sign
+                sign = self._lit(spec.sign)
                 out.append(sign)
             if spec.n_prefix:
                 pref = num[to_prefix:to_prefix + spec.n_prefix]
@@ -783,13 +779,13 @@
                     raise oefmt(space.w_ValueError,
                                 "sign not allowed with 'c' presentation type")
                 value = space.int_w(w_num)
-                max_char = runicode.MAXUNICODE if self.is_unicode else 0xFF
+                max_char = 0x10FFFF if self.is_unicode else 0xFF
                 if not (0 <= value <= max_char):
                     raise oefmt(space.w_OverflowError,
                                 "%%c arg not in range(%s)",
                                 hex(max_char))
                 if self.is_unicode:
-                    result = runicode.UNICHR(value)
+                    result = rutf8.unichr_as_utf8(value)
                 else:
                     result = chr(value)
                 n_digits = 1
@@ -845,6 +841,7 @@
                 prefix = "0x"
             as_str = value.format(LONG_DIGITS[:base], prefix)
             if self.is_unicode:
+                XXX
                 return as_str.decode("latin-1")
             return as_str
 
@@ -852,7 +849,7 @@
             if base == 10:
                 s = str(value)
                 if self.is_unicode:
-                    return s.decode("latin-1")
+                    return rutf8.decode_latin_1(s)
                 return s
             # This part is slow.
             negative = value < 0
@@ -893,7 +890,7 @@
                 i -= 1
                 buf[i] = "-"
             assert i >= 0
-            return self.empty.join(buf[i:])
+            return "".join(buf[i:])
 
         def format_int_or_long(self, w_num, kind):
             space = self.space
@@ -975,7 +972,7 @@
             have_dec_point, to_remainder = self._parse_number(result, to_number)
             n_remainder = len(result) - to_remainder
             if self.is_unicode:
-                digits = result.decode("latin-1")
+                digits = rutf8.decode_latin_1(result)
             else:
                 digits = result
             spec = self._calc_num_width(0, sign, to_number, n_digits,
@@ -1081,8 +1078,8 @@
                                                                to_imag_number)
 
             if self.is_unicode:
-                re_num = re_num.decode("latin-1")
-                im_num = im_num.decode("latin-1")
+                re_num = rutf8.decode_latin_1(re_num)
+                im_num = rutf8.decode_latin_1(im_num)
 
             #set remainder, in CPython _parse_number sets this
             #using n_re_digits causes tests to fail
@@ -1111,7 +1108,7 @@
             self._fill_char = tmp_fill_char
 
             #compute L and R padding - stored in self._left_pad and self._right_pad
-            self._calc_padding(self.empty, re_spec.n_total + im_spec.n_total + 1 +
+            self._calc_padding("", re_spec.n_total + im_spec.n_total + 1 +
                                            add_parens * 2)
 
             out = self._builder()
@@ -1172,7 +1169,7 @@
 @specialize.arg(2)
 def run_formatter(space, w_format_spec, meth, *args):
     if space.isinstance_w(w_format_spec, space.w_unicode):
-        formatter = unicode_formatter(space, space.unicode_w(w_format_spec))
+        formatter = unicode_formatter(space, space.utf8_w(w_format_spec))
         return getattr(formatter, meth)(*args)
     else:
         formatter = str_formatter(space, space.bytes_w(w_format_spec))
diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py
--- a/pypy/objspace/std/test/test_liststrategies.py
+++ b/pypy/objspace/std/test/test_liststrategies.py
@@ -600,9 +600,9 @@
     def test_unicode(self):
         l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")])
         assert isinstance(l1.strategy, BytesListStrategy)
-        l2 = W_ListObject(self.space, [self.space.newunicode(u"eins"), self.space.newunicode(u"zwei")])
+        l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), self.space.newutf8("zwei", 4, 2)])
         assert isinstance(l2.strategy, UnicodeListStrategy)
-        l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newunicode(u"zwei")])
+        l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, 2)])
         assert isinstance(l3.strategy, ObjectListStrategy)
 
     def test_listview_bytes(self):
@@ -626,7 +626,7 @@
         # the same for unicode
         w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')])
         w_l.getitems = None
-        assert space.unicode_w(space.call_method(space.wrap(u"c"), "join", w_l)) == u"acb"
+        assert space.utf8_w(space.call_method(space.wrap(u"c"), "join", w_l)) == "acb"
 
     def test_string_join_returns_same_instance(self):
         space = self.space
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -331,12 +331,11 @@
     def descr__format__(self, space, w_format_spec):
         if not space.isinstance_w(w_format_spec, space.w_unicode):
             w_format_spec = space.call_function(space.w_unicode, w_format_spec)
-        spec = space.unicode_w(w_format_spec)
+        spec = space.utf8_w(w_format_spec)
         formatter = newformat.unicode_formatter(space, spec)
         self2 = unicode_from_object(space, self)
         assert isinstance(self2, W_UnicodeObject)
-        # XXX
-        return formatter.format_string(self2._utf8.decode("utf8"))
+        return formatter.format_string(self2._utf8)
 
     def descr_mod(self, space, w_values):
         return mod_format(space, self, w_values, do_unicode=True)
@@ -526,12 +525,12 @@
 
     def descr_formatter_parser(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
-        tformat = unicode_template_formatter(space, space.unicode_w(self))
+        tformat = unicode_template_formatter(space, space.utf8_w(self))
         return tformat.formatter_parser()
 
     def descr_formatter_field_name_split(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
-        tformat = unicode_template_formatter(space, space.unicode_w(self))
+        tformat = unicode_template_formatter(space, space.utf8_w(self))
         return tformat.formatter_field_name_split()
 
     def descr_lower(self, space):
@@ -1188,8 +1187,7 @@
                 rutf8.check_ascii(s)
             except rutf8.CheckError as a:
                 eh = unicodehelper.encode_error_handler(space)
-                u_len = w_object._len()
-                eh(None, "ascii", "ordinal not in range(128)", s, u_len,
+                eh(None, "ascii", "ordinal not in range(128)", s,
                     a.pos, a.pos + 1)
                 assert False, "always raises"
             return space.newbytes(s)
@@ -1260,7 +1258,7 @@
         # test_unicode_conversion_with__str__
         if w_unicode_method is None:
             if space.isinstance_w(w_obj, space.w_unicode):
-                return space.newunicode(space.unicode_w(w_obj))
+                return unicodehelper.convert_arg_to_w_unicode(space, w_obj)
             w_unicode_method = space.lookup(w_obj, "__str__")
         if w_unicode_method is not None:
             w_res = space.get_and_call_function(w_unicode_method, w_obj)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -123,6 +123,13 @@
             continuation_bytes += 1
     return len(s) - continuation_bytes
 
+def get_flag_from_code(oc):
+    if oc <= 0x7F:
+        return FLAG_ASCII
+    if 0xD800 <= oc <= 0xDFFF:
+        return FLAG_HAS_SURROGATES
+    return FLAG_REGULAR
+
 def codepoint_at_pos(code, pos):
     """ Give a codepoint in code at pos - assumes valid utf8, no checking!
     """
@@ -651,3 +658,30 @@
 
     return unicode_escape #, char_escape_helper
 
+def decode_latin_1(s):
+    if len(s) == 0:
+        return s
+    if len(s) == 1 and ord(s[0]) <= 0x7F:
+        return s
+    try:
+        check_ascii(s)
+        return s
+    except CheckError:
+        return _decode_latin_1_slowpath(s)
+
+def _decode_latin_1_slowpath(s):
+    res = StringBuilder(len(s))
+    i = 0
+    while i < len(s):
+        if ord(s[i]) > 0x7F:
+            while i < len(s) and ord(s[i]) > 0x7F:
+                unichr_as_utf8_append(res, ord(s[i]))
+                i += 1
+        else:
+            start = i
+            end = i + 1
+            while end < len(s) and ord(s[end]) <= 0x7F:
+                end += 1
+            res.append_slice(s, start, end)
+            i = end
+    return res.build()

From pypy.commits at gmail.com  Mon Nov 20 20:59:53 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 17:59:53 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a138899.968ddf0a.433a7.c954@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93107:3e868c28555c
Date: 2017-11-21 01:59 +0000
http://bitbucket.org/pypy/pypy/changeset/3e868c28555c/

Log:	hg merge default

diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py
--- a/lib-python/2.7/test/test_urllib2net.py
+++ b/lib-python/2.7/test/test_urllib2net.py
@@ -286,7 +286,7 @@
             self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
             u.close()
 
-    FTP_HOST = 'ftp://ftp.debian.org/debian/'
+    FTP_HOST = 'ftp://www.pythontest.net/'
 
     def test_ftp_basic(self):
         self.assertIsNone(socket.getdefaulttimeout())
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -20,3 +20,9 @@
 
 .. branch: run-extra-tests
 Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -8,6 +8,35 @@
         cls.w_translated = cls.space.wrap(
             os.path.join(os.path.dirname(__file__),
                          'test_translated.py'))
+        cls.w_stack = cls.space.appexec([], """():
+            import sys
+            def stack(f=None):
+                '''
+                get the call-stack of the caller or the specified frame
+                '''
+                if f is None:
+                    f = sys._getframe(1)
+                res = []
+                seen = set()
+                while f:
+                    if f in seen:
+                        # frame cycle
+                        res.append('...')
+                        break
+                    if f.f_code.co_name == 'runtest':
+                        # if we are running with -A, cut all the stack above
+                        # the test function
+                        break
+                    seen.add(f)
+                    res.append(f.f_code.co_name)
+                    f = f.f_back
+                #print res
+                return res
+            return stack
+       """)
+        if cls.runappdirect:
+            # make sure that "self.stack" does not pass the self
+            cls.w_stack = staticmethod(cls.w_stack.im_func)
 
     def test_new_empty(self):
         from _continuation import continulet
@@ -339,17 +368,24 @@
     def test_f_back(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
         def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
+            #
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
             c.switch(sys._getframe(1).f_back)
+            #
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
             assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
         def foo(c):
             bar(c)
         #
+        assert stack() == ['test_f_back']
         c = continulet(foo)
         f1_bar = c.switch()
         assert f1_bar.f_code.co_name == 'bar'
@@ -358,14 +394,20 @@
         f3_foo = c.switch()
         assert f3_foo is f2_foo
         assert f1_bar.f_back is f3_foo
+        #
         def main():
             f4_main = c.switch()
             assert f4_main.f_code.co_name == 'main'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main', 'test_f_back']
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         def main2():
             f5_main2 = c.switch()
             assert f5_main2.f_code.co_name == 'main2'
             assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         main()
         main2()
         res = c.switch()
diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -5,6 +5,7 @@
     py.test.skip("to run on top of a translated pypy-c")
 
 import sys, random
+from rpython.tool.udir import udir
 
 # ____________________________________________________________
 
@@ -92,6 +93,33 @@
         from pypy.conftest import option
         if not option.runappdirect:
             py.test.skip("meant only for -A run")
+        cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof')))
+
+    def test_vmprof(self):
+        """
+        The point of this test is to check that we do NOT segfault.  In
+        particular, we need to ensure that vmprof does not sample the stack in
+        the middle of a switch, else we read nonsense.
+        """
+        try:
+            import _vmprof
+        except ImportError:
+            py.test.skip("no _vmprof")
+        #
+        def switch_forever(c):
+            while True:
+                c.switch()
+        #
+        f = open(self.vmprof_file, 'w+b')
+        _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False)
+        c = _continuation.continulet(switch_forever)
+        for i in range(10**7):
+            if i % 100000 == 0:
+                print i
+            c.switch()
+        _vmprof.disable()
+        f.close()
+
 
 def _setup():
     for _i in range(20):
diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py
--- a/pypy/module/_vmprof/interp_vmprof.py
+++ b/pypy/module/_vmprof/interp_vmprof.py
@@ -93,8 +93,8 @@
     return space.newtext(path)
 
 def stop_sampling(space):
-    return space.newint(rvmprof.stop_sampling(space))
+    return space.newint(rvmprof.stop_sampling())
 
 def start_sampling(space):
-    rvmprof.start_sampling(space)
+    rvmprof.start_sampling()
     return space.w_None
diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py
--- a/pypy/module/cpyext/longobject.py
+++ b/pypy/module/cpyext/longobject.py
@@ -237,9 +237,9 @@
     assert isinstance(w_long, W_LongObject)
     return w_long.num.sign
 
-UCHARP = lltype.Ptr(lltype.Array(
-    rffi.UCHAR, hints={'nolength':True, 'render_as_const':True}))
- at cpython_api([UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject)
+CONST_UCHARP = lltype.Ptr(lltype.Array(rffi.UCHAR, hints={'nolength': True,
+                                       'render_as_const': True}))
+ at cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject)
 def _PyLong_FromByteArray(space, bytes, n, little_endian, signed):
     little_endian = rffi.cast(lltype.Signed, little_endian)
     signed = rffi.cast(lltype.Signed, signed)
diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
--- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
+++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py
@@ -2271,7 +2271,7 @@
         char32_t foo_4bytes(char32_t);
     """)
     lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """
-    #if !defined(__cplusplus) || __cplusplus < 201103L
+    #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L)
     typedef uint_least16_t char16_t;
     typedef uint_least32_t char32_t;
     #endif
diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py
--- a/pypy/module/thread/test/test_import_lock.py
+++ b/pypy/module/thread/test/test_import_lock.py
@@ -101,8 +101,8 @@
         importhook(space, 'sys')
         assert importlock.count == 0
         # A new module
-        importhook(space, "time")
-        assert importlock.count == 1
+        importhook(space, 're')
+        assert importlock.count >= 9
         # Import it again
         previous_count = importlock.count
         importhook(space, "time")
diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py
--- a/rpython/annotator/annrpython.py
+++ b/rpython/annotator/annrpython.py
@@ -15,34 +15,10 @@
     typeof, s_ImpossibleValue, SomeInstance, intersection, difference)
 from rpython.annotator.bookkeeper import Bookkeeper
 from rpython.rtyper.normalizecalls import perform_normalizations
-from collections import deque
 
 log = AnsiLogger("annrpython")
 
 
-class ShuffleDict(object):
-    def __init__(self):
-        self._d = {}
-        self.keys = deque()
-
-    def __setitem__(self, k, v):
-        if k in self._d:
-            self._d[k] = v
-        else:
-            self._d[k] = v
-            self.keys.append(k)
-
-    def __getitem__(self, k):
-        return self._d[k]
-
-    def popitem(self):
-        key = self.keys.popleft()
-        item = self._d.pop(key)
-        return (key, item)
-
-    def __nonzero__(self):
-        return bool(self._d)
-
 class RPythonAnnotator(object):
     """Block annotator for RPython.
     See description in doc/translation.txt."""
@@ -57,7 +33,7 @@
             translator = TranslationContext()
             translator.annotator = self
         self.translator = translator
-        self.pendingblocks = ShuffleDict()  # map {block: graph-containing-it}
+        self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation
         self.annotated = {}      # set of blocks already seen
         self.added_blocks = None # see processblock() below
         self.links_followed = {} # set of links that have ever been followed
@@ -81,7 +57,7 @@
         self.errors = []
 
     def __getstate__(self):
-        attrs = """translator pendingblocks annotated links_followed
+        attrs = """translator genpendingblocks annotated links_followed
         notify bookkeeper frozen policy added_blocks""".split()
         ret = self.__dict__.copy()
         for key, value in ret.items():
@@ -212,19 +188,47 @@
             else:
                 self.mergeinputargs(graph, block, cells)
             if not self.annotated[block]:
-                self.pendingblocks[block] = graph
+                self.schedulependingblock(graph, block)
+
+    def schedulependingblock(self, graph, block):
+        # 'self.genpendingblocks' is a list of dictionaries which is
+        # logically equivalent to just one dictionary.  But we keep a
+        # 'generation' number on each block (=key), and whenever we
+        # process a block, we increase its generation number.  The
+        # block is added to the 'genpendingblocks' indexed by its
+        # generation number.  See complete_pending_blocks() below.
+        generation = getattr(block, 'generation', 0)
+        self.genpendingblocks[generation][block] = graph
 
     def complete_pending_blocks(self):
-        while self.pendingblocks:
-            block, graph = self.pendingblocks.popitem()
-            self.processblock(graph, block)
+        while True:
+            # Find the first of the dictionaries in 'self.genpendingblocks'
+            # which is not empty
+            gen = 0
+            for pendingblocks in self.genpendingblocks:
+                if pendingblocks:
+                    break
+                gen += 1
+            else:
+                return    # all empty => done
+
+            gen += 1   # next generation number
+            if len(self.genpendingblocks) == gen:
+                self.genpendingblocks.append({})
+
+            # Process all blocks at this level
+            # (if any gets re-inserted, it will be into the next level)
+            while pendingblocks:
+                block, graph = pendingblocks.popitem()
+                block.generation = gen
+                self.processblock(graph, block)
 
     def complete(self):
         """Process pending blocks until none is left."""
         while True:
             self.complete_pending_blocks()
             self.policy.no_more_blocks_to_annotate(self)
-            if not self.pendingblocks:
+            if not any(self.genpendingblocks):
                 break   # finished
         # make sure that the return variables of all graphs is annotated
         if self.added_blocks is not None:
@@ -309,21 +313,15 @@
     #___ interface for annotator.bookkeeper _______
 
     def recursivecall(self, graph, whence, inputcells):
-        if isinstance(whence, tuple):
+        if whence is not None:
             parent_graph, parent_block, parent_index = whence
             tag = parent_block, parent_index
             self.translator.update_call_graph(parent_graph, graph, tag)
-        # self.notify[graph.returnblock] is a dictionary of call
-        # points to this func which triggers a reflow whenever the
-        # return block of this graph has been analysed.
-        callpositions = self.notify.setdefault(graph.returnblock, {})
-        if whence is not None:
-            if callable(whence):
-                def callback():
-                    whence(self, graph)
-            else:
-                callback = whence
-            callpositions[callback] = True
+            # self.notify[graph.returnblock] is a set of call
+            # points to this func which triggers a reflow whenever the
+            # return block of this graph has been analysed.
+            returnpositions = self.notify.setdefault(graph.returnblock, set())
+            returnpositions.add(whence)
 
         # generalize the function's input arguments
         self.addpendingblock(graph, graph.startblock, inputcells)
@@ -416,7 +414,7 @@
     def reflowpendingblock(self, graph, block):
         assert not self.frozen
         assert graph not in self.fixed_graphs
-        self.pendingblocks[block] = graph
+        self.schedulependingblock(graph, block)
         assert block in self.annotated
         self.annotated[block] = False  # must re-flow
         self.blocked_blocks[block] = (graph, None)
@@ -574,12 +572,8 @@
                 self.follow_link(graph, link, constraints)
 
         if block in self.notify:
-            # reflow from certain positions when this block is done
-            for callback in self.notify[block]:
-                if isinstance(callback, tuple):
-                    self.reflowfromposition(callback) # callback is a position
-                else:
-                    callback()
+            for position in self.notify[block]:
+                self.reflowfromposition(position)
 
 
     def follow_link(self, graph, link, constraints):
diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py
--- a/rpython/annotator/bookkeeper.py
+++ b/rpython/annotator/bookkeeper.py
@@ -547,10 +547,8 @@
         (position_key, "first") and (position_key, "second").
 
         In general, "unique_key" should somehow uniquely identify where
-        the call is in the source code, and "callback" can be either a
-        position_key to reflow from when we see more general results,
-        or a real callback function that will be called with arguments
-        # "(annotator, called_graph)" whenever the result is generalized.
+        the call is in the source code, and "callback" is a
+        position_key to reflow from when we see more general results.
 
         "replace" can be set to a list of old unique_key values to
         forget now, because the given "unique_key" replaces them.
diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py
--- a/rpython/annotator/dictdef.py
+++ b/rpython/annotator/dictdef.py
@@ -1,5 +1,5 @@
 from rpython.annotator.model import (
-    s_ImpossibleValue, SomeInteger, s_Bool, union)
+    s_ImpossibleValue, SomeInteger, s_Bool, union, AnnotatorError)
 from rpython.annotator.listdef import ListItem
 from rpython.rlib.objectmodel import compute_hash
 
@@ -51,23 +51,19 @@
 
         s_key = self.s_value
 
-        def check_eqfn(annotator, graph):
-            s = annotator.binding(graph.getreturnvar())
-            assert s_Bool.contains(s), (
+        s = self.bookkeeper.emulate_pbc_call(
+            myeq, self.s_rdict_eqfn, [s_key, s_key], replace=replace_othereq)
+        if not s_Bool.contains(s):
+            raise AnnotatorError(
                 "the custom eq function of an r_dict must return a boolean"
                 " (got %r)" % (s,))
-        self.bookkeeper.emulate_pbc_call(myeq, self.s_rdict_eqfn, [s_key, s_key],
-                                         replace=replace_othereq,
-                                         callback = check_eqfn)
 
-        def check_hashfn(annotator, graph):
-            s = annotator.binding(graph.getreturnvar())
-            assert SomeInteger().contains(s), (
+        s = self.bookkeeper.emulate_pbc_call(
+            myhash, self.s_rdict_hashfn, [s_key], replace=replace_otherhash)
+        if not SomeInteger().contains(s):
+            raise AnnotatorError(
                 "the custom hash function of an r_dict must return an integer"
                 " (got %r)" % (s,))
-        self.bookkeeper.emulate_pbc_call(myhash, self.s_rdict_hashfn, [s_key],
-                                         replace=replace_otherhash,
-                                         callback = check_hashfn)
 
 
 class DictValue(ListItem):
@@ -93,11 +89,11 @@
         self.force_non_null = force_non_null
 
     def read_key(self, position_key):
-        self.dictkey.read_locations[position_key] = True
+        self.dictkey.read_locations.add(position_key)
         return self.dictkey.s_value
 
     def read_value(self, position_key):
-        self.dictvalue.read_locations[position_key] = True
+        self.dictvalue.read_locations.add(position_key)
         return self.dictvalue.s_value
 
     def same_as(self, other):
diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py
--- a/rpython/annotator/listdef.py
+++ b/rpython/annotator/listdef.py
@@ -30,7 +30,7 @@
         self.s_value = s_value
         self.bookkeeper = bookkeeper
         self.itemof = {}  # set of all ListDefs using this ListItem
-        self.read_locations = {}
+        self.read_locations = set()
         if bookkeeper is None:
             self.dont_change_any_more = True
 
@@ -95,7 +95,7 @@
                 self.notify_update()
             if s_new_value != s_other_value:
                 other.notify_update()
-            self.read_locations.update(other.read_locations)
+            self.read_locations |= other.read_locations
 
     def patch(self):
         for listdef in self.itemof:
@@ -130,7 +130,7 @@
         self.listitem.itemof[self] = True
 
     def read_item(self, position_key):
-        self.listitem.read_locations[position_key] = True
+        self.listitem.read_locations.add(position_key)
         return self.listitem.s_value
 
     def same_as(self, other):
diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py
--- a/rpython/annotator/test/test_annrpython.py
+++ b/rpython/annotator/test/test_annrpython.py
@@ -2141,28 +2141,6 @@
         assert (fdesc.get_s_signatures((2, (), False))
                 == [([someint,someint],someint)])
 
-    def test_emulated_pbc_call_callback(self):
-        def f(a,b):
-            return a + b
-        from rpython.annotator import annrpython
-        a = annrpython.RPythonAnnotator()
-        from rpython.annotator import model as annmodel
-
-        memo = []
-        def callb(ann, graph):
-            memo.append(annmodel.SomeInteger() == ann.binding(graph.getreturnvar()))
-
-        s_f = a.bookkeeper.immutablevalue(f)
-        s = a.bookkeeper.emulate_pbc_call('f', s_f, [annmodel.SomeInteger(), annmodel.SomeInteger()],
-                                          callback=callb)
-        assert s == annmodel.SomeImpossibleValue()
-        a.complete()
-
-        assert a.binding(graphof(a, f).getreturnvar()).knowntype == int
-        assert len(memo) >= 1
-        for t in memo:
-            assert t
-
     def test_iterator_union(self):
         def it(d):
             return d.iteritems()
diff --git a/rpython/config/support.py b/rpython/config/support.py
--- a/rpython/config/support.py
+++ b/rpython/config/support.py
@@ -41,8 +41,8 @@
     Function to determine if your system comes with PAX protection.
     """
     if sys.platform.startswith('linux'):
-        # we need a running process PID and 1 is always running
-        with open("/proc/1/status") as fd:
+        # use PID of current process for the check
+        with open("/proc/self/status") as fd:
             data = fd.read()
         if 'PaX' in data:
             return True
diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py
--- a/rpython/flowspace/model.py
+++ b/rpython/flowspace/model.py
@@ -170,7 +170,7 @@
 
 class Block(object):
     __slots__ = """inputargs operations exitswitch
-                exits blockcolor""".split()
+                exits blockcolor generation""".split()
 
     def __init__(self, inputargs):
         self.inputargs = list(inputargs)  # mixed list of variable/const XXX
diff --git a/rpython/rlib/rerased.py b/rpython/rlib/rerased.py
--- a/rpython/rlib/rerased.py
+++ b/rpython/rlib/rerased.py
@@ -15,6 +15,8 @@
 """
 
 import sys
+from collections import defaultdict
+
 from rpython.annotator import model as annmodel
 from rpython.rtyper.extregistry import ExtRegistryEntry
 from rpython.rtyper.llannotation import lltype_to_annotation
@@ -48,34 +50,29 @@
     def __deepcopy__(self, memo):
         return self
 
-    def _getdict(self, bk):
-        try:
-            dict = bk._erasing_pairs_tunnel
-        except AttributeError:
-            dict = bk._erasing_pairs_tunnel = {}
-        return dict
+class IdentityDesc(object):
+    def __init__(self, bookkeeper):
+        self.bookkeeper = bookkeeper
+        self.s_input = annmodel.s_ImpossibleValue
+        self.reflowpositions = {}
 
-    def enter_tunnel(self, bookkeeper, s_obj):
-        dict = self._getdict(bookkeeper)
-        s_previousobj, reflowpositions = dict.setdefault(
-            self, (annmodel.s_ImpossibleValue, {}))
-        s_obj = annmodel.unionof(s_previousobj, s_obj)
-        if s_obj != s_previousobj:
-            dict[self] = (s_obj, reflowpositions)
-            for position in reflowpositions:
-                bookkeeper.annotator.reflowfromposition(position)
+    def enter_tunnel(self, s_obj):
+        s_obj = annmodel.unionof(self.s_input, s_obj)
+        if s_obj != self.s_input:
+            self.s_input = s_obj
+            for position in self.reflowpositions:
+                self.bookkeeper.annotator.reflowfromposition(position)
 
-    def leave_tunnel(self, bookkeeper):
-        dict = self._getdict(bookkeeper)
-        s_obj, reflowpositions = dict.setdefault(
-            self, (annmodel.s_ImpossibleValue, {}))
-        reflowpositions[bookkeeper.position_key] = True
-        return s_obj
+    def leave_tunnel(self):
+        self.reflowpositions[self.bookkeeper.position_key] = True
+        return self.s_input
 
-    def get_input_annotation(self, bookkeeper):
-        dict = self._getdict(bookkeeper)
-        s_obj, _ = dict[self]
-        return s_obj
+def _get_desc(bk, identity):
+    try:
+        descs = bk._erasing_pairs_descs
+    except AttributeError:
+        descs = bk._erasing_pairs_descs = defaultdict(lambda: IdentityDesc(bk))
+    return descs[identity]
 
 _identity_for_ints = ErasingPairIdentity("int")
 
@@ -94,21 +91,23 @@
         _about_ = erase
 
         def compute_result_annotation(self, s_obj):
-            identity.enter_tunnel(self.bookkeeper, s_obj)
+            desc = _get_desc(self.bookkeeper, identity)
+            desc.enter_tunnel(s_obj)
             return _some_erased()
 
         def specialize_call(self, hop):
             bk = hop.rtyper.annotator.bookkeeper
-            s_obj = identity.get_input_annotation(bk)
+            desc = _get_desc(bk, identity)
             hop.exception_cannot_occur()
-            return _rtype_erase(hop, s_obj)
+            return _rtype_erase(hop, desc.s_input)
 
     class Entry(ExtRegistryEntry):
         _about_ = unerase
 
         def compute_result_annotation(self, s_obj):
             assert _some_erased().contains(s_obj)
-            return identity.leave_tunnel(self.bookkeeper)
+            desc = _get_desc(self.bookkeeper, identity)
+            return desc.leave_tunnel()
 
         def specialize_call(self, hop):
             hop.exception_cannot_occur()
@@ -130,6 +129,7 @@
     def __init__(self, x, identity):
         self._x = x
         self._identity = identity
+
     def __repr__(self):
         return "Erased(%r, %r)" % (self._x, self._identity)
 
@@ -140,7 +140,7 @@
             assert config.translation.taggedpointers, "need to enable tagged pointers to use erase_int"
             return lltype.cast_int_to_ptr(r_self.lowleveltype, value._x * 2 + 1)
         bk = r_self.rtyper.annotator.bookkeeper
-        s_obj = value._identity.get_input_annotation(bk)
+        s_obj = _get_desc(bk, value._identity).s_input
         r_obj = r_self.rtyper.getrepr(s_obj)
         if r_obj.lowleveltype is lltype.Void:
             return lltype.nullptr(r_self.lowleveltype.TO)
@@ -182,9 +182,9 @@
     _type_ = Erased
 
     def compute_annotation(self):
-        identity = self.instance._identity
+        desc = _get_desc(self.bookkeeper, self.instance._identity)
         s_obj = self.bookkeeper.immutablevalue(self.instance._x)
-        identity.enter_tunnel(self.bookkeeper, s_obj)
+        desc.enter_tunnel(s_obj)
         return _some_erased()
 
 # annotation and rtyping support
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -1881,7 +1881,8 @@
 
 c_chroot = external('chroot', [rffi.CCHARP], rffi.INT,
                     save_err=rffi.RFFI_SAVE_ERRNO,
-                    macro=_MACRO_ON_POSIX)
+                    macro=_MACRO_ON_POSIX,
+                    compilation_info=ExternalCompilationInfo(includes=['unistd.h']))
 
 @replace_os_function('chroot')
 def chroot(path):
diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,6 +3,7 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
+from rpython.rlib import rvmprof
 from rpython.rlib.rvmprof import cintf
 
 DEBUG = False
@@ -40,11 +41,13 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
+        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
+            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -55,9 +55,9 @@
 
     return None
 
-def stop_sampling(space):
+def stop_sampling():
     fd = _get_vmprof().cintf.vmprof_stop_sampling()
     return rffi.cast(lltype.Signed, fd)
 
-def start_sampling(space):
+def start_sampling():
     _get_vmprof().cintf.vmprof_start_sampling()
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -62,7 +62,6 @@
         SHARED.join('compat.c'),
         SHARED.join('machine.c'),
         SHARED.join('vmp_stack.c'),
-        SHARED.join('vmprof_mt.c'),
         SHARED.join('vmprof_memory.c'),
         SHARED.join('vmprof_common.c'),
         # symbol table already in separate_module_files
@@ -70,6 +69,10 @@
     post_include_bits=[],
     compile_extra=compile_extra
     )
+if sys.platform != 'win32':
+    eci_kwds['separate_module_files'].append(
+        SHARED.join('vmprof_mt.c'),
+    )
 global_eci = ExternalCompilationInfo(**eci_kwds)
 
 def configure_libbacktrace_linux():
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -164,23 +164,25 @@
 
     @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code)
     def main(self, code, count):
+        code = self.MyCode('py:main:3:main')
+        rvmprof.register_code(code, self.MyCode.get_name)
+        code = self.MyCode('py:code:7:native_func')
+        rvmprof.register_code(code, self.MyCode.get_name)
         if count > 0:
             return self.main(code, count-1)
         else:
             return self.native_func(100)
 
     def test(self):
-        # XXX: this test is known to fail since rev a4f077ba651c, but buildbot
-        # never ran it. FIXME.
         from vmprof import read_profile
-        from vmprof.show import PrettyPrinter
+        # from vmprof.show import PrettyPrinter
         assert self.rpy_entry_point(3, 0.5) == 42000
         assert self.tmpfile.check()
-        #
+
         prof = read_profile(self.tmpfilename)
         tree = prof.get_tree()
-        p = PrettyPrinter()
-        p._print_tree(tree)
+        # p = PrettyPrinter()
+        # p._print_tree(tree)
         def walk(tree, symbols):
             symbols.append(tree.name)
             if len(tree.children) == 0:
@@ -189,7 +191,7 @@
                 walk(child, symbols)
         symbols = []
         walk(tree, symbols)
-        not_found = ['n:native_func']
+        not_found = ['py:code:7:native_func']
         for sym in symbols:
             for i,name in enumerate(not_found):
                 if sym.startswith(name):
diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py
--- a/rpython/rlib/test/test_rarithmetic.py
+++ b/rpython/rlib/test/test_rarithmetic.py
@@ -2,7 +2,7 @@
 from rpython.rtyper.test.test_llinterp import interpret
 from rpython.rlib.rarithmetic import *
 from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError
-from hypothesis import given, strategies
+from hypothesis import given, strategies, assume
 import sys
 import py
 
@@ -404,8 +404,11 @@
 def test_int_c_div_mod(x, y):
     assert int_c_div(~x, y) == -(abs(~x) // y)
     assert int_c_div( x,-y) == -(x // y)
-    if (x, y) == (sys.maxint, 1):
-        py.test.skip("would overflow")
+
+ at given(strategies.integers(min_value=0, max_value=sys.maxint),
+       strategies.integers(min_value=1, max_value=sys.maxint))
+def test_int_c_div_mod_2(x, y):
+    assume((x, y) != (sys.maxint, 1))  # This case would overflow
     assert int_c_div(~x,-y) == +(abs(~x) // y)
     for x1 in [x, ~x]:
         for y1 in [y, -y]:
diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py
--- a/rpython/rlib/test/test_rstacklet.py
+++ b/rpython/rlib/test/test_rstacklet.py
@@ -10,6 +10,8 @@
 from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT
 from rpython.rlib import rrandom, rgc
 from rpython.rlib.rarithmetic import intmask
+from rpython.rlib.nonconst import NonConstant
+from rpython.rlib import rvmprof
 from rpython.rtyper.lltypesystem import lltype, llmemory, rffi
 from rpython.translator.c.test.test_standalone import StandaloneTests
 
@@ -273,7 +275,23 @@
         llmemory.raw_free(raw)
 
 
+# <vmprof-hack>
+# bah, we need to make sure that vmprof_execute_code is annotated, else
+# rvmprof.c does not compile correctly
+class FakeVMProfCode(object):
+    pass
+rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name')
+ at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code)
+def fake_vmprof_main(code, num):
+    return 42
+# </vmprof-hack>
+
 def entry_point(argv):
+    # <vmprof-hack>
+    if NonConstant(False):
+        fake_vmprof_main(FakeVMProfCode(), 42)
+    # </vmprof-hack>
+    #
     seed = 0
     if len(argv) > 1:
         seed = int(argv[1])

From pypy.commits at gmail.com  Tue Nov 21 00:32:34 2017
From: pypy.commits at gmail.com (rlamy)
Date: Mon, 20 Nov 2017 21:32:34 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Do not use PyUnicode_Check in
 PyUnicode_AS_UNICODE, ever
Message-ID: <5a13ba72.47b0df0a.849f3.fbb9@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93108:ecfbd8f62994
Date: 2017-11-21 05:32 +0000
http://bitbucket.org/pypy/pypy/changeset/ecfbd8f62994/

Log:	Do not use PyUnicode_Check in PyUnicode_AS_UNICODE, ever

	This macro may be used to fill in an uninitialised, unrealised
	unicode object, but PyUnicode_Check realises it, and modifying a
	PyPy-linked PyUnicodeObject is a no-no...

diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -61,8 +61,7 @@
    use PyUnicode_WRITE() and PyUnicode_READ(). */
 
 #define PyUnicode_AS_UNICODE(op) \
-    (assert(PyUnicode_Check(op)), \
-     (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+     ((((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
       PyUnicode_AsUnicode((PyObject *)(op)))
 
 #define PyUnicode_AS_DATA(op) \
diff --git a/pypy/module/cpyext/test/_widechar.c b/pypy/module/cpyext/test/_widechar.c
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/_widechar.c
@@ -0,0 +1,47 @@
+// Enable asserts. This used to fail in that case only.
+#undef NDEBUG
+
+#include "Python.h"
+
+static PyObject *
+test_widechar(PyObject *self)
+{
+    const wchar_t invalid[1] = {(wchar_t)0x110000u};
+    PyObject *wide;
+
+    wide = PyUnicode_FromUnicode(NULL, 1);
+    if (wide == NULL)
+        return NULL;
+    PyUnicode_AS_UNICODE(wide)[0] = invalid[0];
+    if (_PyUnicode_Ready(wide) < 0) {
+        return NULL;
+    }
+    return wide;
+}
+
+static PyMethodDef TestMethods[] = {
+    {"test_widechar",           (PyCFunction)test_widechar,      METH_NOARGS},
+    {NULL, NULL} /* sentinel */
+};
+
+static struct PyModuleDef _testcapimodule = {
+    PyModuleDef_HEAD_INIT,
+    "_widechar",
+    NULL,
+    -1,
+    TestMethods,
+    NULL,
+    NULL,
+    NULL,
+    NULL
+};
+
+PyMODINIT_FUNC
+PyInit__widechar(void)
+{
+    PyObject *m;
+    m = PyModule_Create(&_testcapimodule);
+    if (m == NULL)
+        return NULL;
+    return m;
+}
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -356,6 +356,10 @@
         print(repr(wide), repr(utf8))
         assert wide == utf8
 
+    def test_invalid(self):
+        m = self.import_module('_widechar')
+        raises(ValueError, m.test_widechar)
+
 
 class TestUnicode(BaseApiTest):
     def test_unicodeobject(self, space):

From pypy.commits at gmail.com  Tue Nov 21 04:30:34 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 01:30:34 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: interpreter fixes
Message-ID: <5a13f23a.aea6df0a.cd033.ca65@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93109:86548802b11b
Date: 2017-11-21 10:29 +0100
http://bitbucket.org/pypy/pypy/changeset/86548802b11b/

Log:	interpreter fixes

diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -231,11 +231,14 @@
     return s[pt:ps]
 
 def decode_utf8_recode(space, s, ps, end, recode_encoding):
-    lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, end)
-    w_v = unicodehelper.encode(space, space.newutf8(s[ps:end], lgt, flag),
+    p = ps
+    while p < end and ord(s[p]) & 0x80:
+        p += 1
+    lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, p)
+    w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt, flag),
                                recode_encoding)
     v = space.bytes_w(w_v)
-    return v, ps
+    return v, p
 
 def raise_app_valueerror(space, msg):
     raise OperationError(space.w_ValueError, space.newtext(msg))
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -10,7 +10,7 @@
             assert space.str_w(w_ret) == value
         elif isinstance(value, unicode):
             assert space.type(w_ret) == space.w_unicode
-            assert space.unicode_w(w_ret) == value
+            assert space.utf8_w(w_ret).decode('utf8') == value
         else:
             assert False
 
@@ -102,7 +102,4 @@
     def test_decode_unicode_utf8(self):
         buf = parsestring.decode_unicode_utf8(self.space,
                                               'u"\xf0\x9f\x92\x8b"', 2, 6)
-        if sys.maxunicode == 65535:
-            assert buf == r"\U0000d83d\U0000dc8b"
-        else:
-            assert buf == r"\U0001f48b"
+        assert buf == r"\U0001f48b"
diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py
--- a/pypy/interpreter/test/test_objspace.py
+++ b/pypy/interpreter/test/test_objspace.py
@@ -216,9 +216,7 @@
         space = self.space
         w = space.wrap
         assert space.text0_w(w("123")) == "123"
-        exc = space.raises_w(space.w_TypeError, space.text0_w, w("123\x004"))
-        assert space.unicode0_w(w(u"123")) == u"123"
-        exc = space.raises_w(space.w_TypeError, space.unicode0_w, w(u"123\x004"))
+        space.raises_w(space.w_TypeError, space.text0_w, w("123\x004"))
 
     def test_getindex_w(self):
         w_instance1 = self.space.appexec([], """():
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -351,12 +351,12 @@
         try:
             chr = r_uint(int(s[pos:pos+digits], 16))
         except ValueError:
-            aaaa
             endinpos = pos
             while s[endinpos] in hexdigits:
                 endinpos += 1
             res, pos = errorhandler(errors, encoding,
                                     message, s, pos-2, endinpos)
+            size, flag = rutf8.check_utf8(res, True)
             builder.append(res)
         else:
             # when we get here, chr is a 32-bit unicode character
@@ -1392,7 +1392,7 @@
     while pos < size:
         ch = s[pos]
 
-        c = mapping.get(ch, ERROR_CHAR)
+        c = mapping.get(ord(ch), ERROR_CHAR)
         if c == ERROR_CHAR:
             r, pos = errorhandler(errors, "charmap",
                                   "character maps to <undefined>",
@@ -1407,20 +1407,17 @@
 
 def utf8_encode_charmap(s, errors, errorhandler=None,
                            mapping=None):
-    YYY
+    size = len(s)
     if mapping is None:
-        return unicode_encode_latin_1(s, size, errors,
-                                      errorhandler=errorhandler)
-
-    if errorhandler is None:
-        errorhandler = default_unicode_error_encode
+        return utf8_encode_latin_1(s, size, errors,
+                                   errorhandler=errorhandler)
 
     if size == 0:
         return ''
     result = StringBuilder(size)
     pos = 0
     while pos < size:
-        ch = s[pos]
+        ch = rutf8.codepoint_at_pos(s, pos)
 
         c = mapping.get(ch, '')
         if len(c) == 0:
@@ -1428,9 +1425,10 @@
             collend = pos + 1
             while collend < size and mapping.get(s[collend], '') == '':
                 collend += 1
-            ru, rs, pos = errorhandler(errors, "charmap",
-                                       "character maps to <undefined>",
-                                       s, pos, collend)
+            rs, pos = errorhandler(errors, "charmap",
+                                   "character maps to <undefined>",
+                                   s, pos, collend)
+            XXXX
             if rs is not None:
                 # py3k only
                 result.append(rs)
@@ -1445,6 +1443,6 @@
                 result.append(c2)
             continue
         result.append(c)
-        pos += 1
+        pos = rutf8.next_codepoint_pos(s, pos)
     return result.build()
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -551,10 +551,10 @@
 
         # get the character from the mapping
         if self.mapping_w is not None:
-            w_ch = self.mapping_w[ord(ch)]
+            w_ch = self.mapping_w[ch]
         else:
             try:
-                w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
+                w_ch = space.getitem(self.w_mapping, space.newint(ch))
             except OperationError as e:
                 if not e.match(space, space.w_LookupError):
                     raise
@@ -587,7 +587,7 @@
 
         # get the character from the mapping
         try:
-            w_ch = space.getitem(self.w_mapping, space.newint(ord(ch)))
+            w_ch = space.getitem(self.w_mapping, space.newint(ch))
         except OperationError as e:
             if not e.match(space, space.w_LookupError):
                 raise
@@ -633,8 +633,8 @@
     return space.newtuple([space.newutf8(result, lgt, flag),
                            space.newint(consumed)])
 
- at unwrap_spec(utf8='utf8', errors='text_or_none')
-def charmap_encode(space, utf8, errors="strict", w_mapping=None):
+ at unwrap_spec(errors='text_or_none')
+def charmap_encode(space, w_unicode, errors="strict", w_mapping=None):
     from pypy.interpreter import unicodehelper
 
     if errors is None:
@@ -645,9 +645,10 @@
         mapping = Charmap_Encode(space, w_mapping)
 
     state = space.fromcache(CodecState)
-    result = unicodehelper.unicode_encode_charmap(
-        utf8, errors, state.encode_error_handler, mapping)
-    return space.newtuple([space.newbytes(result), space.newint(len(uni))])
+    w_uni = unicodehelper.convert_arg_to_w_unicode(space, w_unicode)
+    result = unicodehelper.utf8_encode_charmap(
+        space.utf8_w(w_uni), errors, state.encode_error_handler, mapping)
+    return space.newtuple([space.newbytes(result), space.newint(w_uni._len())])
 
 
 @unwrap_spec(chars='utf8')

From pypy.commits at gmail.com  Tue Nov 21 08:03:46 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 05:03:46 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix all the tests in codecs until
 test_ztranslation
Message-ID: <5a142432.0b0f1c0a.ac5e3.472b@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93110:c7109cb7f6be
Date: 2017-11-21 14:03 +0100
http://bitbucket.org/pypy/pypy/changeset/c7109cb7f6be/

Log:	fix all the tests in codecs until test_ztranslation

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -173,8 +173,13 @@
                 r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
-                res.append(r)
                 for j in range(pos - cur):
+                    c = rutf8.codepoint_at_pos(r, j)
+                    if c > 0xFF:
+                        errorhandler("strict", 'latin1',
+                                     'ordinal not in range(256)', s,
+                                     cur, cur + 1)
+                    res.append(chr(c))
                     i = rutf8.next_codepoint_pos(s, i)
                 cur = pos
         cur += 1
@@ -200,7 +205,12 @@
             msg = "ordinal not in range(128)"
             r, newpos = errorhandler(errors, 'ascii', msg, utf8,
                 pos, endpos)
-            for _ in range(newpos - pos):
+            for j in range(newpos - pos):
+                c = rutf8.codepoint_at_pos(r, j)
+                if c > 0x7F:
+                    errorhandler("strict", 'ascii',
+                                 'ordinal not in range(128)', utf8,
+                                 pos, pos + 1)                
                 i = rutf8.next_codepoint_pos(utf8, i)
             pos = newpos
             res.append(r)
@@ -364,7 +374,7 @@
                 message = "illegal Unicode character"
                 res, pos = errorhandler(errors, encoding,
                                         message, s, pos-2, pos+digits)
-                size, flag = rutf8.check_utf8(res)
+                size, flag = rutf8.check_utf8(res, True)
                 builder.append(res)
             else:
                 rutf8.unichr_as_utf8_append(builder, chr, True)
@@ -778,21 +788,25 @@
                 if base64bits > 0: # left-over bits
                     if base64bits >= 6:
                         # We've seen at least one base-64 character
-                        aaa
                         pos += 1
                         msg = "partial character in shift sequence"
                         res, pos = errorhandler(errors, 'utf7',
                                                 msg, s, pos-1, pos)
+                        reslen, resflags = rutf8.check_utf8(res, True)
+                        outsize += reslen
+                        flag = combine_flags(flag, resflags)
                         result.append(res)
                         continue
                     else:
                         # Some bits remain; they should be zero
                         if base64buffer != 0:
-                            bbb
                             pos += 1
                             msg = "non-zero padding bits in shift sequence"
                             res, pos = errorhandler(errors, 'utf7',
                                                     msg, s, pos-1, pos)
+                            reslen, resflags = rutf8.check_utf8(res, True)
+                            outsize += reslen
+                            flag = combine_flags(flag, resflags)
                             result.append(res)
                             continue
 
@@ -826,11 +840,13 @@
             outsize += 1
             pos += 1
         else:
-            yyy
             startinpos = pos
             pos += 1
             msg = "unexpected special character"
             res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos)
+            reslen, resflags = rutf8.check_utf8(res, True)
+            outsize += reslen
+            flag = combine_flags(flag, resflags)
             result.append(res)
 
     # end of string
@@ -973,7 +989,7 @@
     else:
         bo = 1
     if size == 0:
-        return u'', 0, bo
+        return '', 0, 0, rutf8.FLAG_ASCII, bo
     if bo == -1:
         # force little endian
         ihi = 1
@@ -1182,7 +1198,7 @@
     else:
         bo = 1
     if size == 0:
-        return u'', 0, bo
+        return '', 0, 0, rutf8.FLAG_ASCII, bo
     if bo == -1:
         # force little endian
         iorder = [0, 1, 2, 3]
@@ -1409,40 +1425,43 @@
                            mapping=None):
     size = len(s)
     if mapping is None:
-        return utf8_encode_latin_1(s, size, errors,
-                                   errorhandler=errorhandler)
+        return utf8_encode_latin_1(s, errors, errorhandler=errorhandler)
 
     if size == 0:
         return ''
     result = StringBuilder(size)
     pos = 0
+    index = 0
     while pos < size:
         ch = rutf8.codepoint_at_pos(s, pos)
 
         c = mapping.get(ch, '')
         if len(c) == 0:
             # collect all unencodable chars. Important for narrow builds.
-            collend = pos + 1
-            while collend < size and mapping.get(s[collend], '') == '':
-                collend += 1
-            rs, pos = errorhandler(errors, "charmap",
+            collend = rutf8.next_codepoint_pos(s, pos)
+            endindex = index + 1
+            while collend < size and mapping.get(rutf8.codepoint_at_pos(s, collend), '') == '':
+                collend = rutf8.next_codepoint_pos(s, collend)
+                endindex += 1
+            rs, endindex = errorhandler(errors, "charmap",
                                    "character maps to <undefined>",
-                                   s, pos, collend)
-            XXXX
-            if rs is not None:
-                # py3k only
-                result.append(rs)
-                continue
-            for ch2 in ru:
-                c2 = mapping.get(ch2, '')
-                if len(c2) == 0:
+                                   s, index, endindex)
+            j = 0
+            for _ in range(endindex - index):
+                ch2 = rutf8.codepoint_at_pos(rs, j)
+                ch2 = mapping.get(ch2, '')
+                if not ch2:
                     errorhandler(
                         "strict", "charmap",
                         "character maps to <undefined>",
-                        s,  pos, pos + 1)
-                result.append(c2)
+                        s,  index, index + 1)
+                result.append(ch2)
+                index += 1
+                j = rutf8.next_codepoint_pos(rs, j)
+                pos = rutf8.next_codepoint_pos(s, pos)
             continue
         result.append(c)
+        index += 1
         pos = rutf8.next_codepoint_pos(s, pos)
     return result.build()
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -1,6 +1,6 @@
 from rpython.rlib import jit, rutf8
 from rpython.rlib.objectmodel import we_are_translated, not_rpython
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
@@ -241,33 +241,42 @@
                     "don't know how to handle %T in error callback", w_exc)
 
 def backslashreplace_errors(space, w_exc):
+    from pypy.interpreter import unicodehelper
+
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+        w_obj = space.getattr(w_exc, space.newtext('object'))
+        space.realutf8_w(w_obj) # for errors
+        w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj)
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
-        builder = UnicodeBuilder()
+        start = w_obj._index_to_byte(start)
+        end = w_obj._index_to_byte(end)
+        builder = StringBuilder()
+        obj = w_obj._utf8
         pos = start
         while pos < end:
-            oc = ord(obj[pos])
+            oc = rutf8.codepoint_at_pos(obj, pos)
             num = hex(oc)
             if (oc >= 0x10000):
-                builder.append(u"\\U")
+                builder.append("\\U")
                 zeros = 8
             elif (oc >= 0x100):
-                builder.append(u"\\u")
+                builder.append("\\u")
                 zeros = 4
             else:
-                builder.append(u"\\x")
+                builder.append("\\x")
                 zeros = 2
             lnum = len(num)
             nb = zeros + 2 - lnum # num starts with '0x'
             if nb > 0:
-                builder.append_multiple_char(u'0', nb)
-            builder.append_slice(unicode(num), 2, lnum)
-            pos += 1
-        return space.newtuple([space.newunicode(builder.build()), w_end])
+                builder.append_multiple_char('0', nb)
+            builder.append_slice(num, 2, lnum)
+            pos = rutf8.next_codepoint_pos(obj, pos)
+        r = builder.build()
+        lgt, flag = rutf8.check_utf8(r, True)
+        return space.newtuple([space.newutf8(r, lgt, flag), w_end])
     else:
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
@@ -489,7 +498,7 @@
 @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
              w_final=WrappedDefault(False))
 def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
-    from pypy.interpreter.unicodehelper import DecodeWrapper
+    from pypy.interpreter.unicodehelper import str_decode_utf_16_helper
 
     if errors is None:
         errors = 'strict'
@@ -504,16 +513,17 @@
     consumed = len(data)
     if final:
         consumed = 0
-    res, consumed, byteorder = runicode.str_decode_utf_16_helper(
-        data, len(data), errors, final,
-        DecodeWrapper(state.decode_error_handler).handle, byteorder)
-    return space.newtuple([space.newunicode(res), space.newint(consumed),
+    res, consumed, lgt, flag, byteorder = str_decode_utf_16_helper(
+        data, errors, final,
+        state.decode_error_handler, byteorder)
+    return space.newtuple([space.newutf8(res, lgt, flag),
+                           space.newint(consumed),
                            space.newint(byteorder)])
 
 @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int,
              w_final=WrappedDefault(False))
 def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None):
-    from pypy.interpreter.unicodehelper import DecodeWrapper
+    from pypy.interpreter.unicodehelper import str_decode_utf_32_helper
 
     final = space.is_true(w_final)
     state = space.fromcache(CodecState)
@@ -526,10 +536,11 @@
     consumed = len(data)
     if final:
         consumed = 0
-    res, consumed, byteorder = runicode.str_decode_utf_32_helper(
-        data, len(data), errors, final,
-        DecodeWrapper(state.decode_error_handler).handle, byteorder)
-    return space.newtuple([space.newunicode(res), space.newint(consumed),
+    res, consumed, lgt, flag, byteorder = str_decode_utf_32_helper(
+        data, errors, final,
+        state.decode_error_handler, byteorder)
+    return space.newtuple([space.newutf8(res, lgt, flag),
+                           space.newint(consumed),
                            space.newint(byteorder)])
 
 # ____________________________________________________________
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -592,11 +592,11 @@
         def handler_unicodeinternal(exc):
             if not isinstance(exc, UnicodeDecodeError):
                 raise TypeError("don't know how to handle %r" % exc)
-            return (u"\x01", 1)
+            return (u"\x01", 4)
         codecs.register_error("test.hui", handler_unicodeinternal)
         res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
         if sys.maxunicode > 65535:
-            assert res == u"\u0000\u0001\u0000"   # UCS4 build
+            assert res == u"\u0000\u0001"   # UCS4 build
         else:
             assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
 

From pypy.commits at gmail.com  Tue Nov 21 08:08:03 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 05:08:03 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: simple fixes in fake objspace
Message-ID: <5a142533.43aadf0a.d1e02.1f1b@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93111:df28f6398687
Date: 2017-11-21 14:07 +0100
http://bitbucket.org/pypy/pypy/changeset/df28f6398687/

Log:	simple fixes in fake objspace

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -2143,7 +2143,7 @@
     'float_w',
     'uint_w',
     'bigint_w',
-    'unicode_w',
+    'utf8_w',
     'unwrap',
     'is_true',
     'is_w',
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -209,7 +209,7 @@
     def newbytes(self, x):
         return w_some_obj()
 
-    def newutf8(self, x, l):
+    def newutf8(self, x, l, f):
         return w_some_obj()
 
     newtext = newbytes

From pypy.commits at gmail.com  Tue Nov 21 09:20:39 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 06:20:39 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: some improvements for
 xmlcharrefreplace
Message-ID: <5a143637.88c5df0a.6bb8.26e3@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93112:fd1b64ce9b80
Date: 2017-11-21 15:19 +0100
http://bitbucket.org/pypy/pypy/changeset/fd1b64ce9b80/

Log:	some improvements for xmlcharrefreplace

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -164,26 +164,31 @@
     while i < size:
         if ord(s[i]) <= 0x7F:
             res.append(s[i])
+            i += 1
+            cur += 1
         else:
             oc = rutf8.codepoint_at_pos(s, i)
             if oc <= 0xFF:
                 res.append(chr(oc))
-                i += 1
+                cur += 1
+                i = rutf8.next_codepoint_pos(s, i)
             else:
                 r, pos = errorhandler(errors, 'latin1',
                                       'ordinal not in range(256)', s, cur,
                                       cur + 1)
                 for j in range(pos - cur):
+                    i = rutf8.next_codepoint_pos(s, i)
+
+                j = 0
+                while j < len(r):
                     c = rutf8.codepoint_at_pos(r, j)
                     if c > 0xFF:
                         errorhandler("strict", 'latin1',
                                      'ordinal not in range(256)', s,
                                      cur, cur + 1)
+                    j = rutf8.next_codepoint_pos(r, j)
                     res.append(chr(c))
-                    i = rutf8.next_codepoint_pos(s, i)
                 cur = pos
-        cur += 1
-        i += 1
     r = res.build()
     return r
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -215,27 +215,30 @@
                     "don't know how to handle %T in error callback", w_exc)
 
 def xmlcharrefreplace_errors(space, w_exc):
+    from pypy.interpreter import unicodehelper
+
     check_exception(space, w_exc)
     if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
-        obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+        w_obj = space.getattr(w_exc, space.newtext('object'))
+        space.realutf8_w(w_obj) # weeoes
+        w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj)
         start = space.int_w(space.getattr(w_exc, space.newtext('start')))
         w_end = space.getattr(w_exc, space.newtext('end'))
         end = space.int_w(w_end)
-        builder = UnicodeBuilder()
+        start = w_obj._index_to_byte(start)
+        end = w_obj._index_to_byte(end)        
+        builder = StringBuilder()
         pos = start
+        obj = w_obj._utf8
         while pos < end:
-            code = ord(obj[pos])
-            if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF and
-                       pos + 1 < end and 0xDC00 <= ord(obj[pos+1]) <= 0xDFFF):
-                code = (code & 0x03FF) << 10
-                code |= ord(obj[pos+1]) & 0x03FF
-                code += 0x10000
-                pos += 1
-            builder.append(u"&#")
-            builder.append(unicode(str(code)))
-            builder.append(u";")
-            pos += 1
-        return space.newtuple([space.newunicode(builder.build()), w_end])
+            code = rutf8.codepoint_at_pos(obj, pos)
+            builder.append("&#")
+            builder.append(str(code))
+            builder.append(";")
+            pos = rutf8.next_codepoint_pos(obj, pos)
+        r = builder.build()
+        lgt, flag = rutf8.check_utf8(r, True)
+        return space.newtuple([space.newutf8(r, lgt, flag), w_end])
     else:
         raise oefmt(space.w_TypeError,
                     "don't know how to handle %T in error callback", w_exc)
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -750,3 +750,9 @@
         assert _codecs.unicode_escape_decode(b) == (u'', 0)
         assert _codecs.raw_unicode_escape_decode(b) == (u'', 0)
         assert _codecs.unicode_internal_decode(b) == (u'', 0)
+
+    def test_xmlcharrefreplace(self):
+        r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace')
+        assert r == '&#4660;\x80&#9029;y\xab'
+        r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace')
+        assert r == '&#4660;&#128;&#9029;y&#171;'

From pypy.commits at gmail.com  Tue Nov 21 09:52:41 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 06:52:41 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: some rpython fixes
Message-ID: <5a143db9.968ddf0a.433a7.990f@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93113:5ffbd0a736d9
Date: 2017-11-21 15:51 +0100
http://bitbucket.org/pypy/pypy/changeset/5ffbd0a736d9/

Log:	some rpython fixes

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -211,12 +211,16 @@
             r, newpos = errorhandler(errors, 'ascii', msg, utf8,
                 pos, endpos)
             for j in range(newpos - pos):
+                i = rutf8.next_codepoint_pos(utf8, i)
+
+            j = 0
+            while j < len(r):
                 c = rutf8.codepoint_at_pos(r, j)
                 if c > 0x7F:
                     errorhandler("strict", 'ascii',
                                  'ordinal not in range(128)', utf8,
-                                 pos, pos + 1)                
-                i = rutf8.next_codepoint_pos(utf8, i)
+                                 pos, pos + 1)  
+                j = rutf8.next_codepoint_pos(r, j)
             pos = newpos
             res.append(r)
         else:
@@ -382,8 +386,8 @@
                 size, flag = rutf8.check_utf8(res, True)
                 builder.append(res)
             else:
-                rutf8.unichr_as_utf8_append(builder, chr, True)
-                flag = rutf8.get_flag_from_code(chr)
+                rutf8.unichr_as_utf8_append(builder, intmask(chr), True)
+                flag = rutf8.get_flag_from_code(intmask(chr))
                 pos += digits
                 size = 1
 
@@ -755,27 +759,31 @@
         if inShift: # in a base-64 section
             if _utf7_IS_BASE64(ord(ch)): #consume a base-64 character
                 base64buffer = (base64buffer << 6) | _utf7_FROM_BASE64(ch)
+                assert base64buffer >= 0
                 base64bits += 6
                 pos += 1
 
                 if base64bits >= 16:
                     # enough bits for a UTF-16 value
                     outCh = base64buffer >> (base64bits - 16)
+                    assert outCh >= 0
                     base64bits -= 16
                     base64buffer &= (1 << base64bits) - 1 # clear high bits
                     assert outCh <= 0xffff
                     if surrogate:
                         # expecting a second surrogate
                         if outCh >= 0xDC00 and outCh <= 0xDFFF:
-                            xxxx
-                            result.append(
-                                UNICHR((((surrogate & 0x3FF)<<10) |
-                                        (outCh & 0x3FF)) + 0x10000))
+                            code = (((surrogate & 0x3FF)<<10) |
+                                        (outCh & 0x3FF)) + 0x10000
+                            rutf8.unichr_as_utf8_append(result, code)
+                            outsize += 1
+                            flag = combine_flags(flag, rutf8.FLAG_REGULAR)
                             surrogate = 0
                             continue
                         else:
-                            YYYY
-                            result.append(unichr(surrogate))
+                            rutf8.unichr_as_utf8_append(result, surrogate)
+                            flag = rutf8.FLAG_HAS_SURROGATES
+                            outsize += 1
                             surrogate = 0
                             # Not done with outCh: falls back to next line
                     if outCh >= 0xD800 and outCh <= 0xDBFF:
@@ -784,6 +792,7 @@
                     else:
                         flag = combine_flags(flag, rutf8.unichr_to_flag(outCh))
                         outsize += 1
+                        assert outCh >= 0
                         rutf8.unichr_as_utf8_append(result, outCh, True)
 
             else:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -19,7 +19,7 @@
 from rpython.rlib.objectmodel import enforceargs, we_are_translated
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import jit
-from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib.rarithmetic import r_uint
 from rpython.rlib.unicodedata import unicodedb
 from rpython.rtyper.lltypesystem import lltype, rffi
 
@@ -27,6 +27,7 @@
 def unichr_as_utf8(code, allow_surrogates=False):
     """Encode code (numeric value) as utf8 encoded string
     """
+    assert code >= 0
     code = r_uint(code)
     if code <= r_uint(0x7F):
         # Encode ASCII

From pypy.commits at gmail.com  Tue Nov 21 11:19:51 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 08:19:51 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: general progress
Message-ID: <5a145227.48d31c0a.f36c0.9cbc@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93114:cefc9ed0b4c5
Date: 2017-11-21 17:19 +0100
http://bitbucket.org/pypy/pypy/changeset/cefc9ed0b4c5/

Log:	general progress

diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -74,8 +74,8 @@
     substr = s[ps : q]
     if rawmode or '\\' not in s[ps:]:
         if need_encoding:
-            utf, (lgt, flag) = unicodehelper.decode_utf8(space, substr)
-            w_u = space.newutf8(utf, lgt, flag)
+            lgt, flag = unicodehelper.check_utf8_or_raise(space, substr)
+            w_u = space.newutf8(substr, lgt, flag)
             w_v = unicodehelper.encode(space, w_u, encoding)
             return w_v
         else:
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -1094,9 +1094,9 @@
         byteorder = BYTEORDER
 
     pos = 0
+    index = 0
     while pos < size:
         ch = rutf8.codepoint_at_pos(s, pos)
-        pos = rutf8.next_codepoint_pos(s, pos)
 
         if ch < 0xD800:
             _STORECHAR(result, ch, byteorder)
@@ -1106,27 +1106,27 @@
         elif ch >= 0xE000 or allow_surrogates:
             _STORECHAR(result, ch, byteorder)
         else:
-            ru, pos = errorhandler(errors, public_encoding_name,
+            ru, newindex = errorhandler(errors, public_encoding_name,
                                    'surrogates not allowed',
                                     s, pos-1, pos)
-            xxx
-            #if rs is not None:
-            #    # py3k only
-            #    if len(rs) % 2 != 0:
-            #        errorhandler('strict', public_encoding_name,
-            #                     'surrogates not allowed',
-            #                     s, pos-1, pos)
-            #    result.append(rs)
-            #    continue
-            for ch in ru:
+            for j in range(newindex - index):
+                pos = rutf8.next_codepoint_pos(s, pos)
+            j = 0
+            while j < len(ru):
+                ch = rutf8.codepoint_at_pos(ru, j)
                 if ord(ch) < 0xD800:
                     _STORECHAR(result, ord(ch), byteorder)
                 else:
                     errorhandler('strict', public_encoding_name,
                                  'surrogates not allowed',
                                  s, pos-1, pos)
+                j = rutf8.next_codepoint_pos(ru, j)
+            index = newindex
             continue
 
+        pos = rutf8.next_codepoint_pos(s, pos)
+        index += 1
+
     return result.build()
 
 def utf8_encode_utf_16(s, errors,
@@ -1285,32 +1285,30 @@
         byteorder = BYTEORDER
 
     pos = 0
+    index = 0
     while pos < size:
         ch = rutf8.codepoint_at_pos(s, pos)
         pos = rutf8.next_codepoint_pos(s, pos)
-        ch2 = 0
         if not allow_surrogates and 0xD800 <= ch < 0xE000:
-            ru, pos = errorhandler(errors, public_encoding_name,
+            ru, newindex = errorhandler(errors, public_encoding_name,
                                         'surrogates not allowed',
                                         s, pos-1, pos)
-            XXX
-            if rs is not None:
-                # py3k only
-                if len(rs) % 4 != 0:
-                    errorhandler('strict', public_encoding_name,
-                                    'surrogates not allowed',
-                                    s, pos-1, pos)
-                result.append(rs)
-                continue
-            for ch in ru:
+            for j in range(newindex - index):
+                pos = rutf8.next_codepoint_pos(s, pos)
+            j = 0
+            while j < len(ru):
+                ch = rutf8.codepoint_at_pos(ru, j)
                 if ord(ch) < 0xD800:
                     _STORECHAR32(result, ord(ch), byteorder)
                 else:
                     errorhandler('strict', public_encoding_name,
-                                    'surrogates not allowed',
-                                    s, pos-1, pos)
+                                 'surrogates not allowed',
+                                 s, pos-1, pos)
+                j = rutf8.next_codepoint_pos(ru, j)
+            index = newindex
             continue
         _STORECHAR32(result, ch, byteorder)
+        index += 1
 
     return result.build()
 
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -2,8 +2,9 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 from rpython.translator import cdir
+from rpython.rlib import rutf8
 
-UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'
+UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'.encode("utf8")
 
 
 class EncodeDecodeError(Exception):
@@ -126,7 +127,7 @@
                                     errorcb, namecb, stringdata)
         src = pypy_cjk_dec_outbuf(decodebuf)
         length = pypy_cjk_dec_outlen(decodebuf)
-        return rffi.wcharpsize2unicode(src, length)
+        return rffi.wcharpsize2utf8(src, length)
 
 def multibytecodec_decerror(decodebuf, e, errors,
                             errorcb, namecb, stringdata):
@@ -148,7 +149,7 @@
     if errors == "strict":
         raise EncodeDecodeError(start, end, reason)
     elif errors == "ignore":
-        replace = u""
+        replace = ""
     elif errors == "replace":
         replace = UNICODE_REPLACEMENT_CHARACTER
     else:
@@ -156,8 +157,12 @@
         replace, end = errorcb(errors, namecb, reason,
                                stringdata, start, end)
         # 'replace' is RPython unicode here
-    with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf:
-        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
+    lgt, _ = rutf8.check_utf8(replace, True)
+    inbuf = rffi.utf82wcharp(replace, lgt)
+    try:
+        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end)
+    finally:
+        lltype.free(inbuf, flavor='raw')
     if r == MBERR_NOMEMORY:
         raise MemoryError
 
@@ -256,6 +261,7 @@
             replace = "?"
     else:
         assert errorcb
+        XXX
         retu, rets, end = errorcb(errors, namecb, reason,
                                   unicodedata.encode("utf8"), start, end)
         if rets is not None:
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -1,3 +1,6 @@
+
+from rpython.rlib import rutf8
+
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
@@ -18,13 +21,14 @@
         state = space.fromcache(CodecState)
         #
         try:
-            u_output = c_codecs.decode(self.codec, input, errors,
+            utf8_output = c_codecs.decode(self.codec, input, errors,
                                      state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError as e:
             raise wrap_unicodedecodeerror(space, e, input, self.name)
         except RuntimeError:
             raise wrap_runtimeerror(space)
-        return space.newtuple([space.newunicode(u_output),
+        lgt, flag = rutf8.check_utf8(utf8_output, True)
+        return space.newtuple([space.newutf8(utf8_output, lgt, flag),
                                space.newint(len(input))])
 
     @unwrap_spec(input='utf8', errors="text_or_none")
@@ -74,7 +78,7 @@
             space.newtext(e.reason)]))
 
 def wrap_unicodeencodeerror(space, e, input, inputlen, name):
-    flag = 13
+    _, flag = rutf8.check_utf8(input, True)
     raise OperationError(
         space.w_UnicodeEncodeError,
         space.newtuple([
diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py
--- a/pypy/objspace/std/newformat.py
+++ b/pypy/objspace/std/newformat.py
@@ -841,8 +841,7 @@
                 prefix = "0x"
             as_str = value.format(LONG_DIGITS[:base], prefix)
             if self.is_unicode:
-                XXX
-                return as_str.decode("latin-1")
+                return rutf8.decode_latin_1(as_str)
             return as_str
 
         def _int_to_base(self, base, value):
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1009,6 +1009,29 @@
  wcharp2unicoden, wcharpsize2unicode, unicode2wchararray, unicode2rawmem,
  ) = make_string_mappings(unicode)
 
+def wcharpsize2utf8(w, size):
+    """ Helper to convert WCHARP pointer to utf8 in one go.
+    Equivalent to wcharpsize2unicode().encode("utf8")
+    """
+    from rpython.rlib import rutf8
+
+    s = StringBuilder(size)
+    for i in range(size):
+        rutf8.unichr_as_utf8_append(s, ord(w[i]))
+    return s.build()
+
+def utf82wcharp(utf8, utf8len):
+    from rpython.rlib import rutf8
+
+    w = lltype.malloc(CWCHARP.TO, utf8len, flavor='raw')
+    i = 0
+    index = 0
+    while i < len(utf8):
+        w[index] = unichr(rutf8.codepoint_at_pos(utf8, i))
+        i = rutf8.next_codepoint_pos(utf8, i)
+        index += 1
+    return w
+
 # char**
 CCHARPP = lltype.Ptr(lltype.Array(CCHARP, hints={'nolength': True}))
 
diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py
--- a/rpython/rtyper/lltypesystem/test/test_rffi.py
+++ b/rpython/rtyper/lltypesystem/test/test_rffi.py
@@ -590,6 +590,14 @@
         res = fn(expected_extra_mallocs=range(30))
         assert res == 32 * len(d)
 
+    def test_wcharp_to_utf8(self):
+        wchar = lltype.malloc(CWCHARP.TO, 3, flavor='raw')
+        wchar[0] = u'\u1234'
+        wchar[1] = u'\x80'
+        wchar[2] = u'a'
+        assert wcharpsize2utf8(wchar, 3).decode("utf8") == u'\u1234\x80a'
+        lltype.free(wchar, flavor='raw')
+
 class TestRffiInternals:
     def test_struct_create(self):
         X = CStruct('xx', ('one', INT))

From pypy.commits at gmail.com  Tue Nov 21 12:02:19 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 21 Nov 2017 09:02:19 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Advance self.pos also when reading only
 from self.buffer
Message-ID: <5a145c1b.92831c0a.46c3b.722f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93115:36daba4180a3
Date: 2017-11-21 17:02 +0000
http://bitbucket.org/pypy/pypy/changeset/36daba4180a3/

Log:	Advance self.pos also when reading only from self.buffer

diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py
--- a/pypy/module/_io/interp_bufferedio.py
+++ b/pypy/module/_io/interp_bufferedio.py
@@ -890,6 +890,7 @@
             have = self._readahead()
             if have >= length:
                 rwbuffer.setslice(0, self.buffer[self.pos:self.pos + length])
+                self.pos += length
                 return space.newint(length)
             written = 0
             if have > 0:
diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py
--- a/pypy/module/_io/test/test_bufferedio.py
+++ b/pypy/module/_io/test/test_bufferedio.py
@@ -214,6 +214,15 @@
         assert n == 1
         assert buf[:n] == b'c'
 
+        bufio = _io.BufferedReader(MockIO(), buffer_size=20)
+        buf = bytearray(2)
+        bufio.peek(3)
+        assert bufio.readinto1(buf) == 2
+        assert buf == b'ab'
+        n = bufio.readinto1(buf)
+        assert n == 1
+        assert buf[:n] == b'c'
+
     def test_seek(self):
         import _io
         raw = _io.FileIO(self.tmpfile)

From pypy.commits at gmail.com  Tue Nov 21 12:26:17 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 09:26:17 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix checking for unichr range
Message-ID: <5a1461b9.11c6df0a.f8192.b810@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93116:0021cc161b99
Date: 2017-11-21 18:25 +0100
http://bitbucket.org/pypy/pypy/changeset/0021cc161b99/

Log:	fix checking for unichr range

diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py
--- a/pypy/module/__builtin__/operation.py
+++ b/pypy/module/__builtin__/operation.py
@@ -24,16 +24,15 @@
 @unwrap_spec(code=int)
 def unichr(space, code):
     "Return a Unicode string of one character with the given ordinal."
-    try:
-        s = rutf8.unichr_as_utf8(code, allow_surrogates=True)
-    except ValueError:
-        raise oefmt(space.w_ValueError, "unichr() arg out of range")
-    if code < 0x80:
+    if code < 0 or code > 0x10FFFF:
+        raise oefmt(space.w_ValueError, "unichr() arg out of range")        
+    elif code < 0x80:
         flag = rutf8.FLAG_ASCII
     elif 0xD800 <= code <= 0xDFFF:
         flag = rutf8.FLAG_HAS_SURROGATES
     else:
         flag = rutf8.FLAG_REGULAR
+    s = rutf8.unichr_as_utf8(code, allow_surrogates=True)
     return space.newutf8(s, 1, flag)
 
 def len(space, w_obj):

From pypy.commits at gmail.com  Tue Nov 21 13:38:19 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 21 Nov 2017 10:38:19 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Fix traceback.print_exception() when
 exc.offset == 0
Message-ID: <5a14729b.c78c1c0a.ebbca.df2d@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93117:e37a09d8450a
Date: 2017-11-21 18:38 +0000
http://bitbucket.org/pypy/pypy/changeset/e37a09d8450a/

Log:	Fix traceback.print_exception() when exc.offset == 0

diff --git a/lib-python/3/traceback.py b/lib-python/3/traceback.py
--- a/lib-python/3/traceback.py
+++ b/lib-python/3/traceback.py
@@ -544,8 +544,8 @@
             yield '    {}\n'.format(badline.strip())
             if offset is not None:
                 caretspace = badline.rstrip('\n')
-                offset = min(len(caretspace), offset) - 1
-                caretspace = caretspace[:offset].lstrip()
+                # bug in CPython: the case offset==0 is mishandled
+                caretspace = caretspace[:offset].lstrip()[:-1]
                 # non-space whitespace (likes tabs) must be kept for alignment
                 caretspace = ((c.isspace() and c or ' ') for c in caretspace)
                 yield '    {}^\n'.format(''.join(caretspace))

From pypy.commits at gmail.com  Tue Nov 21 14:42:47 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 21 Nov 2017 11:42:47 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Force recompilation of _testcapimodule.c
 (due to ecfbd8f62994)
Message-ID: <5a1481b7.0ea6df0a.3586e.5cf6@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93118:382fb81ffff8
Date: 2017-11-21 19:42 +0000
http://bitbucket.org/pypy/pypy/changeset/382fb81ffff8/

Log:	Force recompilation of _testcapimodule.c (due to ecfbd8f62994)

diff --git a/lib_pypy/_pypy_testcapi.py b/lib_pypy/_pypy_testcapi.py
--- a/lib_pypy/_pypy_testcapi.py
+++ b/lib_pypy/_pypy_testcapi.py
@@ -8,7 +8,8 @@
         content = fid.read()
     # from cffi's Verifier()
     key = '\x00'.join([sys.version[:3], content])
-    key += 'cpyext-gc-support-2'   # this branch requires recompilation!
+    # change the key to force recompilation
+    key += '2017-11-21'
     if sys.version_info >= (3,):
         key = key.encode('utf-8')
     k1 = hex(binascii.crc32(key[0::2]) & 0xffffffff)

From pypy.commits at gmail.com  Tue Nov 21 15:09:40 2017
From: pypy.commits at gmail.com (fijal)
Date: Tue, 21 Nov 2017 12:09:40 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: whack at cffi
Message-ID: <5a148804.d58bdf0a.8cc33.0be7@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93119:c6537b6d453f
Date: 2017-11-21 21:09 +0100
http://bitbucket.org/pypy/pypy/changeset/c6537b6d453f/

Log:	whack at cffi

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -7,3 +7,5 @@
 * better flag handling in split/splitlines maybe?
 * encode_error_handler has XXX
 * remove assertions from W_UnicodeObject.__init__ if all the builders pass
+* what to do with error handlers that go backwards. There were tests
+  in test_codecs that would check for that
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1773,6 +1773,13 @@
                         "characters")
         return rstring.assert_str0(result)
 
+    def convert_arg_to_w_unicode(self, w_obj, strict=None):
+        # XXX why convert_to_w_unicode does something slightly different?
+        from pypy.objspace.std.unicodeobject import W_UnicodeObject
+        assert not hasattr(self, 'is_fake_objspace')
+        return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict)
+
+
     def realutf8_w(self, w_obj):
         # Like utf8_w(), but only works if w_obj is really of type
         # 'unicode'.  On Python 3 this is the same as utf8_w().
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -35,9 +35,7 @@
     return raise_unicode_exception_encode
 
 def convert_arg_to_w_unicode(space, w_arg, strict=None):
-    from pypy.objspace.std.unicodeobject import W_UnicodeObject
-    assert not hasattr(space, 'is_fake_objspace')
-    return W_UnicodeObject.convert_arg_to_w_unicode(space, w_arg, strict)
+    return space.convert_arg_to_w_unicode(w_arg)
 
 # ____________________________________________________________
 
diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -63,11 +63,14 @@
             return (w_value, len(s) + 1)
         elif space.isinstance_w(w_value, space.w_unicode):
             from pypy.module._cffi_backend import wchar_helper
-            u = space.unicode_w(w_value)
-            if self.ctitem.size == 2:
-                length = wchar_helper.unicode_size_as_char16(u)
+            w_u = space.convert_arg_to_w_unicode(w_value)
+            if self.citem.size == 4:
+                length = w_u._len()
             else:
-                length = wchar_helper.unicode_size_as_char32(u)
+                if not w_u._has_surrogates():
+                    length = w_u._len()
+                else:
+                    length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len())
             return (w_value, length + 1)
         else:
             explicitlength = space.getindex_w(w_value, space.w_OverflowError)
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -5,7 +5,7 @@
 import sys
 
 from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
-from rpython.rlib import jit
+from rpython.rlib import jit, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.tool import rfficache
 
@@ -40,14 +40,15 @@
         return ord(s[0])
 
     def cast_unicode(self, w_ob):
+        import pdb
+        pdb.set_trace()
         space = self.space
-        s = space.unicode_w(w_ob)
-        try:
-            ordinal = wchar_helper.unicode_to_ordinal(s)
-        except ValueError:
+        w_u = space.convert_arg_to_w_unicode(w_ob)
+        if w_u._len() != 1:
             raise oefmt(space.w_TypeError,
                         "cannot cast unicode string of length %d to ctype '%s'",
-                        len(s), self.name)
+                        w_u._len(), self.name)
+        ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0)
         return intmask(ordinal)
 
     def cast(self, w_ob):
@@ -175,8 +176,10 @@
 
     def convert_to_object(self, cdata):
         if self.is_signed_wchar:
-            unichardata = rffi.cast(rffi.CWCHARP, cdata)
-            return self.space.newunicode(unichardata[0])
+            code = ord(rffi.cast(rffi.CWCHARP, cdata)[0])
+            return self.space.newutf8(
+                rutf8.unichr_as_utf8(code), 1,
+                rutf8.get_flag_from_code(code))
         else:
             value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
             try:
@@ -185,7 +188,8 @@
                 raise oefmt(self.space.w_ValueError,
                             "char32_t out of range for "
                             "conversion to unicode: %s", hex(e.ordinal))
-            return self.space.newunicode(u)
+            return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1,
+                rutf8.get_flag_from_code(ord(u)))
 
     def string(self, cdataobj, maxlen):
         with cdataobj as ptr:
@@ -196,16 +200,7 @@
         # returns a r_uint.  If self.size == 2, it is smaller than 0x10000
         space = self.space
         if space.isinstance_w(w_ob, space.w_unicode):
-            u = space.unicode_w(w_ob)
-            try:
-                ordinal = wchar_helper.unicode_to_ordinal(u)
-            except ValueError:
-                pass
-            else:
-                if self.size == 2 and ordinal > 0xffff:
-                    raise self._convert_error("single character <= 0xFFFF",
-                                              w_ob)
-                return ordinal
+            return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0)
         elif (isinstance(w_ob, cdataobj.W_CData) and
                isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
                w_ob.ctype.size == self.size):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -91,11 +91,15 @@
             from pypy.module._cffi_backend import wchar_helper
             if not space.isinstance_w(w_ob, space.w_unicode):
                 raise self._convert_error("unicode or list or tuple", w_ob)
-            s = space.unicode_w(w_ob)
-            if self.ctitem.size == 2:
-                n = wchar_helper.unicode_size_as_char16(s)
+            w_u = space.convert_arg_to_w_unicode(w_ob)
+            if self.size == 4:
+                n = w_u._len()
             else:
-                n = wchar_helper.unicode_size_as_char32(s)
+                if not w_u._has_surrogates():
+                    n = w_u._len()
+                else:
+                    n = wchar_helper.unicode_size_as_char16(w_u._utf8,
+                                                            w_u._len())
             if self.length >= 0 and n > self.length:
                 raise oefmt(space.w_IndexError,
                             "initializer unicode string is too long for '%s' "
@@ -328,11 +332,12 @@
             length = len(s) + 1
         elif space.isinstance_w(w_init, space.w_unicode):
             from pypy.module._cffi_backend import wchar_helper
-            u = space.unicode_w(w_init)
+            w_u = space.convert_arg_to_w_unicode(w_init)
             if self.ctitem.size == 2:
-                length = wchar_helper.unicode_size_as_char16(u)
+                length = wchar_helper.unicode_size_as_char16(w_u._utf8,
+                                                             w_u._len())
             else:
-                length = wchar_helper.unicode_size_as_char32(u)
+                length = w_u._len()
             length += 1
         elif self.is_file:
             result = self.prepare_file(w_init)
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -1,10 +1,12 @@
+from rpython.rlib import rutf8
 from rpython.rlib.objectmodel import specialize
+from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask
 from rpython.rtyper.annlowlevel import llunicode
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw
 
-SIZE_UNICODE = rffi.sizeof(lltype.UniChar)
+SIZE_UNICODE = 4
 
 
 if SIZE_UNICODE == 4:
@@ -48,7 +50,7 @@
         self.ordinal = ordinal
 
 def _unicode_from_wchar(ptr, length):
-    return rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length)
+    return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length)
 
 
 if SIZE_UNICODE == 2:
@@ -86,7 +88,7 @@
     def unicode_from_char16(ptr, length):
         # 'ptr' is a pointer to 'length' 16-bit integers
         ptr = rffi.cast(rffi.USHORTP, ptr)
-        u = [u'\x00'] * length
+        u = StringBuilder(length)
         i = 0
         j = 0
         while j < length:
@@ -97,10 +99,9 @@
                 if 0xDC00 <= ch2 <= 0xDFFF:
                     ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
                     j += 1
-            u[i] = unichr(ch)
+            rutf8.unichr_as_utf8_append(u, ch)
             i += 1
-        del u[i:]
-        return u''.join(u)
+        return u.build()
 
 
 @specialize.ll()
@@ -121,23 +122,16 @@
     return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen)
 
 
-def unicode_size_as_char16(u):
-    result = len(u)
-    if SIZE_UNICODE == 4:
-        for i in range(result):
-            if ord(u[i]) > 0xFFFF:
-                result += 1
+def unicode_size_as_char16(u, len):
+    result = len
+    i = 0
+    while i < len(u):
+        code = rutf8.codepoint_at_pos(u, i)
+        if code > 0xFFFF:
+            result += 1
+        i = rutf8.next_codepoint_pos(u, i)
     return result
 
-def unicode_size_as_char32(u):
-    result = len(u)
-    if SIZE_UNICODE == 2 and result > 1:
-        for i in range(result - 1):
-            if is_surrogate(u, i):
-                result -= 1
-    return result
-
-
 def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero):
     # 'target_ptr' is a raw pointer to 'target_length' wchars;
     # we assume here that target_length == len(u).

From pypy.commits at gmail.com  Tue Nov 21 16:49:59 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 21 Nov 2017 13:49:59 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Adapt idlelib.CallTips for pypy and
 update some docstrings
Message-ID: <5a149f87.ceb1df0a.8f6ae.257a@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93120:83b96bb9cf44
Date: 2017-11-21 21:49 +0000
http://bitbucket.org/pypy/pypy/changeset/83b96bb9cf44/

Log:	Adapt idlelib.CallTips for pypy and update some docstrings

diff --git a/lib-python/3/idlelib/CallTips.py b/lib-python/3/idlelib/CallTips.py
--- a/lib-python/3/idlelib/CallTips.py
+++ b/lib-python/3/idlelib/CallTips.py
@@ -123,6 +123,15 @@
 _first_param = re.compile('(?<=\()\w*\,?\s*')
 _default_callable_argspec = "See source or doc"
 
+def _is_user_method(ob):
+    """Detect user methods on PyPy"""
+    return (isinstance(ob, types.MethodType) and
+        isinstance(ob.__code__, types.CodeType))
+
+def _is_user_function(ob):
+    """Detect user methods on PyPy"""
+    return (isinstance(ob, types.FunctionType) and
+        isinstance(ob.__code__, types.CodeType))
 
 def get_argspec(ob):
     '''Return a string describing the signature of a callable object, or ''.
@@ -140,21 +149,21 @@
         return argspec
     if isinstance(ob, type):
         fob = ob.__init__
-    elif isinstance(ob_call, types.MethodType):
+    elif _is_user_method(ob_call):
         fob = ob_call
     else:
         fob = ob
     if (isinstance(fob, (types.FunctionType, types.MethodType)) and
             hasattr(fob.__code__, 'co_code')):  # PyPy: not on <builtin-code>
         argspec = inspect.formatargspec(*inspect.getfullargspec(fob))
-        if (isinstance(ob, (type, types.MethodType)) or
-                isinstance(ob_call, types.MethodType)):
+        if (_is_user_method(ob) or _is_user_method(ob_call) or
+                (isinstance(ob, type) and _is_user_function(fob))):
             argspec = _first_param.sub("", argspec)
 
     lines = (textwrap.wrap(argspec, _MAX_COLS, subsequent_indent=_INDENT)
             if len(argspec) > _MAX_COLS else [argspec] if argspec else [])
 
-    if isinstance(ob_call, types.MethodType):
+    if _is_user_method(ob_call):
         doc = ob_call.__doc__
     else:
         doc = getattr(ob, "__doc__", "")
diff --git a/lib-python/3/idlelib/idle_test/test_calltips.py b/lib-python/3/idlelib/idle_test/test_calltips.py
--- a/lib-python/3/idlelib/idle_test/test_calltips.py
+++ b/lib-python/3/idlelib/idle_test/test_calltips.py
@@ -63,7 +63,7 @@
         gtest([].append, append_doc)
         gtest(List.append, append_doc)
 
-        gtest(types.MethodType, "method(function, instance)")
+        gtest(types.MethodType, "instancemethod(function, instance, class)")
         gtest(SB(), default_tip)
 
     def test_signature_wrap(self):
diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py
--- a/pypy/module/cpyext/typeobject.py
+++ b/pypy/module/cpyext/typeobject.py
@@ -400,11 +400,13 @@
     lltype.render_immortal(ptr.c_ml_name)
     rffi.setintfield(ptr, 'c_ml_flags', METH_VARARGS | METH_KEYWORDS)
     ptr.c_ml_doc = rffi.cast(rffi.CONST_CCHARP, rffi.str2charp(
-        "T.__new__(S, ...) -> a new object with type S, a subtype of T"))
+        "Create and return a new object.  "
+        "See help(type) for accurate signature."))
     lltype.render_immortal(ptr.c_ml_doc)
     state.new_method_def = ptr
     return ptr
 
+
 def setup_new_method_def(space):
     ptr = get_new_method_def(space)
     ptr.c_ml_meth = rffi.cast(PyCFunction, llslot(space, tp_new_wrapper))
diff --git a/pypy/objspace/std/boolobject.py b/pypy/objspace/std/boolobject.py
--- a/pypy/objspace/std/boolobject.py
+++ b/pypy/objspace/std/boolobject.py
@@ -40,7 +40,7 @@
     @staticmethod
     @unwrap_spec(w_obj=WrappedDefault(False))
     def descr_new(space, w_booltype, w_obj):
-        """T.__new__(S, ...) -> a new object with type S, a subtype of T"""
+        "Create and return a new object.  See help(type) for accurate signature."
         space.w_bool.check_user_subclass(w_booltype)
         return space.newbool(space.is_true(w_obj))
 
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -658,7 +658,7 @@
         """x.__imul__(y) <==> x*=y"""
 
     def __init__():
-        """x.__init__(...) initializes x; see help(type(x)) for signature"""
+        """Initialize self.  See help(type(self)) for accurate signature."""
 
     def __iter__():
         """x.__iter__() <==> iter(x)"""
diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py
--- a/pypy/objspace/std/intobject.py
+++ b/pypy/objspace/std/intobject.py
@@ -514,7 +514,7 @@
     @staticmethod
     @unwrap_spec(w_x=WrappedDefault(0))
     def descr_new(space, w_inttype, w_x, w_base=None):
-        """T.__new__(S, ...) -> a new object with type S, a subtype of T"""
+        "Create and return a new object.  See help(type) for accurate signature."
         return _new_int(space, w_inttype, w_x, w_base)
 
     def descr_hash(self, space):
diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py
--- a/pypy/objspace/std/listobject.py
+++ b/pypy/objspace/std/listobject.py
@@ -301,7 +301,7 @@
         return self.strategy.find(self, w_item, start, end)
 
     def append(self, w_item):
-        """L.append(object) -- append object to end"""
+        """L.append(object) -> None -- append object to end"""
         self.strategy.append(self, w_item)
 
     def length(self):
@@ -403,8 +403,7 @@
         self.strategy.insert(self, index, w_item)
 
     def extend(self, w_iterable):
-        '''L.extend(iterable) -- extend list by appending
-        elements from the iterable'''
+        '''L.extend(iterable) -- extend list by appending elements from the iterable'''
         self.strategy.extend(self, w_iterable)
 
     def reverse(self):
@@ -420,13 +419,13 @@
 
     @staticmethod
     def descr_new(space, w_listtype, __args__):
-        """T.__new__(S, ...) -> a new object with type S, a subtype of T"""
+        "Create and return a new object.  See help(type) for accurate signature."
         w_obj = space.allocate_instance(W_ListObject, w_listtype)
         w_obj.clear(space)
         return w_obj
 
     def descr_init(self, space, __args__):
-        """x.__init__(...) initializes x; see help(type(x)) for signature"""
+        """Initialize self.  See help(type(self)) for accurate signature."""
         # this is on the silly side
         w_iterable, = __args__.parse_obj(
                 None, 'list', init_signature, init_defaults)
@@ -603,8 +602,7 @@
         self.reverse()
 
     def descr_count(self, space, w_value):
-        '''L.count(value) -> integer -- return number of
-        occurrences of value'''
+        '''L.count(value) -> integer -- return number of occurrences of value'''
         # needs to be safe against eq_w() mutating the w_list behind our back
         count = 0
         i = 0
@@ -623,8 +621,8 @@
 
     @unwrap_spec(index=int)
     def descr_pop(self, space, index=-1):
-        '''L.pop([index]) -> item -- remove and return item at
-        index (default last)'''
+        """L.pop([index]) -> item -- remove and return item at index (default last).
+Raises IndexError if list is empty or index is out of range."""
         length = self.length()
         if length == 0:
             raise oefmt(space.w_IndexError, "pop from empty list")
@@ -639,7 +637,7 @@
             raise oefmt(space.w_IndexError, "pop index out of range")
 
     def descr_clear(self, space):
-        '''L.clear() -- remove all items'''
+        """L.clear() -> None -- remove all items from L"""
         self.clear(space)
 
     def descr_copy(self, space):
@@ -647,7 +645,8 @@
         return self.clone()
 
     def descr_remove(self, space, w_value):
-        'L.remove(value) -- remove first occurrence of value'
+        """L.remove(value) -> None -- remove first occurrence of value.
+Raises ValueError if the value is not present."""
         # needs to be safe against eq_w() mutating the w_list behind our back
         try:
             i = self.find(w_value, 0, sys.maxint)
@@ -659,8 +658,8 @@
 
     @unwrap_spec(w_start=WrappedDefault(0), w_stop=WrappedDefault(sys.maxint))
     def descr_index(self, space, w_value, w_start, w_stop):
-        '''L.index(value, [start, [stop]]) -> integer -- return
-        first index of value'''
+        """L.index(value, [start, [stop]]) -> integer -- return first index of value.
+Raises ValueError if the value is not present."""
         # needs to be safe against eq_w() mutating the w_list behind our back
         size = self.length()
         i, stop = unwrap_start_stop(space, size, w_start, w_stop)
@@ -673,8 +672,7 @@
 
     @unwrap_spec(reverse=int)
     def descr_sort(self, space, w_key=None, reverse=False):
-        """ L.sort(key=None, reverse=False) -- stable
-        sort *IN PLACE*"""
+        """L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*"""
         has_key = not space.is_none(w_key)
 
         # create and setup a TimSort instance
diff --git a/pypy/objspace/std/noneobject.py b/pypy/objspace/std/noneobject.py
--- a/pypy/objspace/std/noneobject.py
+++ b/pypy/objspace/std/noneobject.py
@@ -9,7 +9,7 @@
 
     @staticmethod
     def descr_new(space, w_type):
-        """T.__new__(S, ...) -> a new object with type S, a subtype of T"""
+        "Create and return a new object.  See help(type) for accurate signature."
         return space.w_None
 
     def descr_bool(self, space):
diff --git a/pypy/objspace/std/test/test_listobject.py b/pypy/objspace/std/test/test_listobject.py
--- a/pypy/objspace/std/test/test_listobject.py
+++ b/pypy/objspace/std/test/test_listobject.py
@@ -445,8 +445,8 @@
 
     def test_doc(self):
         assert list.__doc__ == "list() -> new empty list\nlist(iterable) -> new list initialized from iterable's items"
-        assert list.__new__.__doc__ == "T.__new__(S, ...) -> a new object with type S, a subtype of T"
-        assert list.__init__.__doc__ == "x.__init__(...) initializes x; see help(type(x)) for signature"
+        assert list.__new__.__doc__ == "Create and return a new object.  See help(type) for accurate signature."
+        assert list.__init__.__doc__ == "Initialize self.  See help(type(self)) for accurate signature."
 
     def test_getstrategyfromlist_w(self):
         l0 = ["a", "2", "a", True]

From pypy.commits at gmail.com  Tue Nov 21 18:57:52 2017
From: pypy.commits at gmail.com (mattip)
Date: Tue, 21 Nov 2017 15:57:52 -0800 (PST)
Subject: [pypy-commit] pypy default: update vmprof from upstream which
 cleans up most of the gcc warnings
Message-ID: <5a14bd80.8e8bdf0a.78af8.e0f4@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93121:2c9ec695ca2c
Date: 2017-11-22 01:56 +0200
http://bitbucket.org/pypy/pypy/changeset/2c9ec695ca2c/

Log:	update vmprof from upstream which cleans up most of the gcc warnings

diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -12,6 +12,7 @@
 #endif
 
 
+#include "vmprof_common.h"
 
 #include "shared/vmprof_get_custom_offset.h"
 #ifdef VMPROF_UNIX
@@ -30,7 +31,7 @@
 }
 #endif
 
-long vmprof_get_profile_path(const char * buffer, long size)
+long vmprof_get_profile_path(char * buffer, long size)
 {
     return vmp_fd_to_path(vmp_profile_fileno(), buffer, size);
 }
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -36,8 +36,8 @@
 RPY_EXTERN int vmprof_stack_append(void*, long);
 RPY_EXTERN long vmprof_stack_pop(void*);
 RPY_EXTERN void vmprof_stack_free(void*);
-RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, intptr_t*, intptr_t);
-RPY_EXTERN long vmprof_get_profile_path(const char *, long);
+RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, void**, intptr_t);
+RPY_EXTERN long vmprof_get_profile_path(char *, long);
 RPY_EXTERN int vmprof_stop_sampling(void);
 RPY_EXTERN void vmprof_start_sampling(void);
 
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
--- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -262,7 +262,7 @@
     }
 
     int depth = 0;
-    PY_STACK_FRAME_T * top_most_frame = frame;
+    //PY_STACK_FRAME_T * top_most_frame = frame;
     while ((depth + _per_loop()) <= max_depth) {
         unw_get_proc_info(&cursor, &pip);
 
@@ -400,7 +400,7 @@
     if (fd == NULL) {
         return 0;
     }
-    char * saveptr;
+    char * saveptr = NULL;
     char * line = NULL;
     char * he = NULL;
     char * name;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -4,6 +4,9 @@
 #include <errno.h>
 
 #ifdef RPYTHON_VMPROF
+
+int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc);
+
 #ifdef RPYTHON_LL2CTYPES
    /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
 
@@ -193,7 +196,7 @@
 #endif
 
 intptr_t vmprof_get_traceback(void *stack, void *ucontext,
-                              intptr_t *result_p, intptr_t result_length)
+                              void **result_p, intptr_t result_length)
 {
     int n;
     int enabled;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -96,7 +96,7 @@
 #endif
 RPY_EXTERN
 intptr_t vmprof_get_traceback(void *stack, void *ucontext,
-                              intptr_t *result_p, intptr_t result_length);
+                              void **result_p, intptr_t result_length);
 #endif
 
 int vmprof_get_signal_type(void);
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -144,7 +144,8 @@
 
     @pytest.fixture
     def init(self, tmpdir):
-        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+        eci = ExternalCompilationInfo(compile_extra=['-g','-O0', '-Werror'],
+                post_include_bits = ['int native_func(int);'],
                 separate_module_sources=["""
                 RPY_EXTERN int native_func(int d) {
                     int j = 0;

From pypy.commits at gmail.com  Wed Nov 22 13:32:16 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 10:32:16 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: io.BufferedRandom also uses the new
 readinto() implementation
Message-ID: <5a15c2b0.d1911c0a.8150a.016b@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93122:70b9b696e219
Date: 2017-11-22 18:32 +0000
http://bitbucket.org/pypy/pypy/changeset/70b9b696e219/

Log:	io.BufferedRandom also uses the new readinto() implementation

diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py
--- a/pypy/module/_io/interp_bufferedio.py
+++ b/pypy/module/_io/interp_bufferedio.py
@@ -869,19 +869,14 @@
             finally:
                 self._reader_reset_buf()
 
-class W_BufferedReader(BufferedMixin, W_BufferedIOBase):
-    @unwrap_spec(buffer_size=int)
-    def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE):
-        self.state = STATE_ZERO
-        check_readable_w(space, w_raw)
+class BufferedReaderMixin(BufferedMixin):
+    _mixin_ = True
 
-        self.w_raw = w_raw
-        self.buffer_size = buffer_size
-        self.readable = True
+    def readinto_w(self, space, w_buffer):
+        return self._readinto(space, w_buffer, read_once=False)
 
-        self._init(space)
-        self._reader_reset_buf()
-        self.state = STATE_OK
+    def readinto1_w(self, space, w_buffer):
+        return self._readinto(space, w_buffer, read_once=True)
 
     def _readinto(self, space, w_buffer, read_once):
         rwbuffer = space.writebuf_w(w_buffer)
@@ -904,7 +899,8 @@
                 self.pos = 0
                 if written + len(self.buffer) < length:
                     try:
-                        got = self._raw_read(space, rwbuffer, written, length - written)
+                        got = self._raw_read(
+                            space, rwbuffer, written, length - written)
                         written += got
                     except BlockingIOError:
                         got = 0
@@ -929,6 +925,19 @@
             return space.newint(written)
 
 
+class W_BufferedReader(BufferedReaderMixin, W_BufferedIOBase):
+    @unwrap_spec(buffer_size=int)
+    def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE):
+        self.state = STATE_ZERO
+        check_readable_w(space, w_raw)
+
+        self.w_raw = w_raw
+        self.buffer_size = buffer_size
+        self.readable = True
+
+        self._init(space)
+        self._reader_reset_buf()
+        self.state = STATE_OK
 
 W_BufferedReader.typedef = TypeDef(
     '_io.BufferedReader', W_BufferedIOBase.typedef,
@@ -939,6 +948,8 @@
     read = interp2app(W_BufferedReader.read_w),
     peek = interp2app(W_BufferedReader.peek_w),
     read1 = interp2app(W_BufferedReader.read1_w),
+    readinto = interp2app(W_BufferedReader.readinto_w),
+    readinto1 = interp2app(W_BufferedReader.readinto1_w),
     raw = interp_attrproperty_w("w_raw", cls=W_BufferedReader),
     readline = interp2app(W_BufferedReader.readline_w),
 
@@ -1100,7 +1111,7 @@
     **methods
 )
 
-class W_BufferedRandom(BufferedMixin, W_BufferedIOBase):
+class W_BufferedRandom(BufferedReaderMixin, W_BufferedIOBase):
     @unwrap_spec(buffer_size=int)
     def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE):
         self.state = STATE_ZERO
@@ -1128,6 +1139,8 @@
     peek = interp2app(W_BufferedRandom.peek_w),
     read1 = interp2app(W_BufferedRandom.read1_w),
     readline = interp2app(W_BufferedRandom.readline_w),
+    readinto = interp2app(W_BufferedRandom.readinto_w),
+    readinto1 = interp2app(W_BufferedRandom.readinto1_w),
 
     write = interp2app(W_BufferedRandom.write_w),
     flush = interp2app(W_BufferedRandom.flush_w),
diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py
--- a/pypy/module/_io/test/test_bufferedio.py
+++ b/pypy/module/_io/test/test_bufferedio.py
@@ -199,6 +199,19 @@
             def readinto(self, buf):
                 buf[:3] = b"abc"
                 return 3
+
+            def writable(self):
+                return True
+
+            def write(self, b):
+                return len(b)
+
+            def seekable(self):
+                return True
+
+            def seek(self, pos, whence):
+                return 0
+
         bufio = _io.BufferedReader(MockIO(), buffer_size=5)
         buf = bytearray(10)
         bufio.read(2)
@@ -223,6 +236,15 @@
         assert n == 1
         assert buf[:n] == b'c'
 
+        bufio = _io.BufferedRandom(MockIO(), buffer_size=10)
+        buf = bytearray(20)
+        bufio.peek(3)
+        assert bufio.readinto1(buf) == 6
+        assert buf[:6] == b'abcabc'
+
+        bufio = _io.BufferedWriter(MockIO(), buffer_size=10)
+        raises(_io.UnsupportedOperation, bufio.readinto1, bytearray(10))
+
     def test_seek(self):
         import _io
         raw = _io.FileIO(self.tmpfile)

From pypy.commits at gmail.com  Wed Nov 22 16:04:45 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 22 Nov 2017 13:04:45 -0800 (PST)
Subject: [pypy-commit] buildbot default: use os tools to clean out old
 virtualenv (untested)
Message-ID: <5a15e66d.923e1c0a.75bd2.001d@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1039:0b37e98f8694
Date: 2017-11-08 03:00 +0200
http://bitbucket.org/pypy/buildbot/changeset/0b37e98f8694/

Log:	use os tools to clean out old virtualenv (untested)

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -467,16 +467,23 @@
             timeout=4000,
             env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest),
                 }))
+        if platform == 'win32':
+            virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe'
+            clean = 'rmdir /s /q pypy-venv'
+        else:
+            virt_pypy = '../venv/pypy-venv/bin/python'
+            clean = 'rm -rf pypy-env'
+        factory.addStep(ShellCmd(
+            description="clean old virtualenv",
+            command=clean,
+            workdir='venv',
+            haltOnFailure=False))
         factory.addStep(ShellCmd(
             description="Create virtualenv",
             command=prefix + ['virtualenv', '--clear', '-p',
                 Property('target_path'), 'pypy-venv'],
             workdir='venv',
             flunkOnFailure=True))
-        if platform == 'win32':
-            virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe'
-        else:
-            virt_pypy = '../venv/pypy-venv/bin/python'
         factory.addStep(ShellCmd(
             description="Install extra tests requirements",
             command=prefix + [virt_pypy, '-m', 'pip', 'install',

From pypy.commits at gmail.com  Wed Nov 22 16:04:48 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 22 Nov 2017 13:04:48 -0800 (PST)
Subject: [pypy-commit] buildbot default: use upgraded virtualenv for pypy -A
 tests, clean out old virtualenv's
Message-ID: <5a15e670.57addf0a.5fc48.c014@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1040:be8418f6ed85
Date: 2017-11-22 23:03 +0200
http://bitbucket.org/pypy/buildbot/changeset/be8418f6ed85/

Log:	use upgraded virtualenv for pypy -A tests, clean out old
	virtualenv's

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -473,15 +473,21 @@
         else:
             virt_pypy = '../venv/pypy-venv/bin/python'
             clean = 'rm -rf pypy-env'
+        target = Property('target_path')
         factory.addStep(ShellCmd(
             description="clean old virtualenv",
             command=clean,
             workdir='venv',
             haltOnFailure=False))
         factory.addStep(ShellCmd(
+            description="Install recent virtualenv",
+            command=prefix + [target, '-mpip', 'install', '--upgrade',
+                              'virtualenv'],
+            workdir='venv',
+            flunkOnFailure=True))
+        factory.addStep(ShellCmd(
             description="Create virtualenv",
-            command=prefix + ['virtualenv', '--clear', '-p',
-                Property('target_path'), 'pypy-venv'],
+            command=prefix + [target, '-mvirtualenv', '--clear', 'pypy-venv'],
             workdir='venv',
             flunkOnFailure=True))
         factory.addStep(ShellCmd(
@@ -555,17 +561,22 @@
             haltOnFailure=False,
             ))
 
+        if platform == 'win32':
+            self.virt_python = r'virt_test\Scripts\python.exe'
+            clean = 'rmdir /s /q virt-test'
+        else:
+            self.virt_python = 'virt_test/bin/python'
+            clean = 'rm -rf virt-test'
+        self.addStep(ShellCmd(
+            description="clean old virtualenv",
+            command=clean,
+            haltOnFailure=False))
         self.addStep(ShellCmd(
             description="create virtualenv for tests",
             command=['virtualenv', 'virt_test'],
             haltOnFailure=True,
             ))
 
-        if platform == 'win32':
-            self.virt_python = r'virt_test\Scripts\python.exe'
-        else:
-            self.virt_python = 'virt_test/bin/python'
-
         self.addStep(ShellCmd(
             description="install requirments to virtual environment",
             command=[self.virt_python, '-mpip', 'install', '-r',

From pypy.commits at gmail.com  Wed Nov 22 16:10:29 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 22 Nov 2017 13:10:29 -0800 (PST)
Subject: [pypy-commit] buildbot default: typo (arigato)
Message-ID: <5a15e7c5.8faedf0a.ec3e7.a90a@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1041:b9268dadd68a
Date: 2017-11-22 23:10 +0200
http://bitbucket.org/pypy/buildbot/changeset/b9268dadd68a/

Log:	typo (arigato)

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -472,7 +472,7 @@
             clean = 'rmdir /s /q pypy-venv'
         else:
             virt_pypy = '../venv/pypy-venv/bin/python'
-            clean = 'rm -rf pypy-env'
+            clean = 'rm -rf pypy-venv'
         target = Property('target_path')
         factory.addStep(ShellCmd(
             description="clean old virtualenv",

From pypy.commits at gmail.com  Wed Nov 22 17:22:26 2017
From: pypy.commits at gmail.com (arigo)
Date: Wed, 22 Nov 2017 14:22:26 -0800 (PST)
Subject: [pypy-commit] pypy default: Untested and hard-to-test,
 but for symmetry reasons if we
Message-ID: <5a15f8a2.b198df0a.c8d51.0f43@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93123:386b50664e3e
Date: 2017-11-22 23:21 +0100
http://bitbucket.org/pypy/pypy/changeset/386b50664e3e/

Log:	Untested and hard-to-test, but for symmetry reasons if we don't call
	start_sampling() here then it means sampling will not restart after
	some switches

diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -26,12 +26,14 @@
     def new(self, callback, arg=llmemory.NULL):
         if DEBUG:
             callback = _debug_wrapper(callback)
+        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             cintf.empty_rvmprof_stack()
             h = self._gcrootfinder.new(self, callback, arg)
         finally:
             cintf.restore_rvmprof_stack(x)
+            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h

From pypy.commits at gmail.com  Wed Nov 22 17:44:28 2017
From: pypy.commits at gmail.com (arigo)
Date: Wed, 22 Nov 2017 14:44:28 -0800 (PST)
Subject: [pypy-commit] pypy default: * Be more careful and let
 stop_sampling()/start_sampling() be called in code
Message-ID: <5a15fdcc.48d31c0a.f36c0.b5b8@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93124:1cc101a9ee5a
Date: 2017-11-22 23:43 +0100
http://bitbucket.org/pypy/pypy/changeset/1cc101a9ee5a/

Log:	* Be more careful and let stop_sampling()/start_sampling() be called
	in code that is not compiled with rvmprof. This is needed from
	rstacklet; previously, it would fail translation on any non-rvmprof-
	supported platform as soon as rstacklet is used.

	* We already call a function from vmprof in rstacklet.py. No point
	in calling another one, when we can make the function have both
	effects.

diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,7 +3,6 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
-from rpython.rlib import rvmprof
 from rpython.rlib.rvmprof import cintf
 
 DEBUG = False
@@ -26,14 +25,12 @@
     def new(self, callback, arg=llmemory.NULL):
         if DEBUG:
             callback = _debug_wrapper(callback)
-        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             cintf.empty_rvmprof_stack()
             h = self._gcrootfinder.new(self, callback, arg)
         finally:
             cintf.restore_rvmprof_stack(x)
-            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
@@ -43,13 +40,11 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
-        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
-            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -56,8 +56,10 @@
     return None
 
 def stop_sampling():
-    fd = _get_vmprof().cintf.vmprof_stop_sampling()
+    from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling
+    fd = vmprof_stop_sampling()
     return rffi.cast(lltype.Signed, fd)
 
 def start_sampling():
-    _get_vmprof().cintf.vmprof_start_sampling()
+    from rpython.rlib.rvmprof.cintf import vmprof_start_sampling
+    vmprof_start_sampling()
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -40,7 +40,7 @@
     compile_extra += ['-DVMPROF_UNIX']
     compile_extra += ['-DVMPROF_LINUX']
 elif sys.platform == 'win32':
-    compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS']
+    compile_extra += ['-DVMPROF_WINDOWS']
     separate_module_files = [SHARED.join('vmprof_win.c')]
     _libs = []
 else:
@@ -120,16 +120,26 @@
     vmprof_get_profile_path = rffi.llexternal("vmprof_get_profile_path", [rffi.CCHARP, lltype.Signed],
                                               lltype.Signed, compilation_info=eci,
                                               _nowrapper=True)
-    vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
-                                              rffi.INT, compilation_info=eci,
-                                              _nowrapper=True)
-    vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
-                                              lltype.Void, compilation_info=eci,
-                                              _nowrapper=True)
 
     return CInterface(locals())
 
 
+# this is always present, but compiles to no-op if RPYTHON_VMPROF is not
+# defined (i.e. if we don't actually use vmprof in the generated C)
+auto_eci = ExternalCompilationInfo(post_include_bits=["""
+#ifndef RPYTHON_VMPROF
+#  define vmprof_stop_sampling()    (-1)
+#  define vmprof_start_sampling()   ((void)0)
+#endif
+"""])
+
+vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
+                                       rffi.INT, compilation_info=auto_eci,
+                                       _nowrapper=True)
+vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
+                                        lltype.Void, compilation_info=auto_eci,
+                                        _nowrapper=True)
+
 
 class CInterface(object):
     def __init__(self, namespace):
@@ -218,6 +228,7 @@
 # stacklet support
 
 def save_rvmprof_stack():
+    vmprof_stop_sampling()
     return vmprof_tl_stack.get_or_make_raw()
 
 def empty_rvmprof_stack():
@@ -225,6 +236,7 @@
 
 def restore_rvmprof_stack(x):
     vmprof_tl_stack.setraw(x)
+    vmprof_start_sampling()
 
 #
 # traceback support

From pypy.commits at gmail.com  Wed Nov 22 17:46:29 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 14:46:29 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Prevent test from crashing for an
 unrelated reason
Message-ID: <5a15fe45.968ddf0a.433a7.de3f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93125:9a354884fd09
Date: 2017-11-22 22:46 +0000
http://bitbucket.org/pypy/pypy/changeset/9a354884fd09/

Log:	Prevent test from crashing for an unrelated reason

diff --git a/lib-python/3/test/test_descr.py b/lib-python/3/test/test_descr.py
--- a/lib-python/3/test/test_descr.py
+++ b/lib-python/3/test/test_descr.py
@@ -4278,7 +4278,10 @@
         c = C()
         c.__dict__[Evil()] = 0
 
-        self.assertEqual(c.attr, 1)
+        try:
+            self.assertEqual(c.attr, 1)
+        except AttributeError:  # when Evil.__eq__ is called twice
+            pass
         # this makes a crash more likely:
         support.gc_collect()
         self.assertNotHasAttr(c, 'attr')

From pypy.commits at gmail.com  Wed Nov 22 17:50:48 2017
From: pypy.commits at gmail.com (fijal)
Date: Wed, 22 Nov 2017 14:50:48 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: in progress io
Message-ID: <5a15ff48.02b8df0a.6f8fa.3148@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93126:559a0a0bb302
Date: 2017-11-22 23:50 +0100
http://bitbucket.org/pypy/pypy/changeset/559a0a0bb302/

Log:	in progress io

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1779,6 +1779,9 @@
         assert not hasattr(self, 'is_fake_objspace')
         return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict)
 
+    def utf8_len_w(self, w_obj):
+        w_obj = self.convert_arg_to_w_unicode(w_obj)
+        return w_obj._utf8, w_obj._len()
 
     def realutf8_w(self, w_obj):
         # Like utf8_w(), but only works if w_obj is really of type
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -10,7 +10,8 @@
 from pypy.module._io.interp_iobase import W_IOBase, convert_size, trap_eintr
 from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong
 from rpython.rlib.rbigint import rbigint
-from rpython.rlib.rstring import UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -29,17 +30,22 @@
 
     def __init__(self, space):
         self.w_newlines_dict = {
-            SEEN_CR: space.newunicode(u"\r"),
-            SEEN_LF: space.newunicode(u"\n"),
-            SEEN_CRLF: space.newunicode(u"\r\n"),
+            SEEN_CR: space.newutf8("\r", 1, FLAG_ASCII),
+            SEEN_LF: space.newutf8("\n", 1, FLAG_ASCII),
+            SEEN_CRLF: space.newutf8("\r\n", 2, FLAG_ASCII),
             SEEN_CR | SEEN_LF: space.newtuple(
-                [space.newunicode(u"\r"), space.newunicode(u"\n")]),
+                [space.newutf8("\r", 1, FLAG_ASCII),
+                 space.newutf8("\n", 1, FLAG_ASCII)]),
             SEEN_CR | SEEN_CRLF: space.newtuple(
-                [space.newunicode(u"\r"), space.newunicode(u"\r\n")]),
+                [space.newutf8("\r", 1, FLAG_ASCII),
+                 space.newutf8("\r\n", 2, FLAG_ASCII)]),
             SEEN_LF | SEEN_CRLF: space.newtuple(
-                [space.newunicode(u"\n"), space.newunicode(u"\r\n")]),
+                [space.newutf8("\n", 1, FLAG_ASCII),
+                 space.newutf8("\r\n", 2, FLAG_ASCII)]),
             SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple(
-                [space.newunicode(u"\r"), space.newunicode(u"\n"), space.newunicode(u"\r\n")]),
+                [space.newutf8("\r", 1, FLAG_ASCII),
+                 space.newutf8("\n", 1, FLAG_ASCII),
+                 space.newutf8("\r\n", 2, FLAG_ASCII)]),
             }
 
     @unwrap_spec(translate=int)
@@ -73,25 +79,25 @@
             raise oefmt(space.w_TypeError,
                         "decoder should return a string result")
 
-        output = space.unicode_w(w_output)
+        output, output_len = space.utf8_len_w(w_output)
         output_len = len(output)
         if self.pendingcr and (final or output_len):
-            output = u'\r' + output
+            output = '\r' + output
             self.pendingcr = False
             output_len += 1
 
         # retain last \r even when not translating data:
         # then readline() is sure to get \r\n in one pass
         if not final and output_len > 0:
-            last = output_len - 1
+            last = len(output) - 1
             assert last >= 0
-            if output[last] == u'\r':
+            if output[last] == '\r':
                 output = output[:last]
                 self.pendingcr = True
                 output_len -= 1
 
         if output_len == 0:
-            return space.newunicode(u"")
+            return space.newutf8("", 1, FLAG_ASCII)
 
         # Record which newlines are read and do newline translation if
         # desired, all in one pass.
@@ -101,52 +107,53 @@
         # for the \r
         only_lf = False
         if seennl == SEEN_LF or seennl == 0:
-            only_lf = (output.find(u'\r') < 0)
+            only_lf = (output.find('\r') < 0)
 
         if only_lf:
             # If not already seen, quick scan for a possible "\n" character.
             # (there's nothing else to be done, even when in translation mode)
-            if seennl == 0 and output.find(u'\n') >= 0:
+            if seennl == 0 and output.find('\n') >= 0:
                 seennl |= SEEN_LF
                 # Finished: we have scanned for newlines, and none of them
                 # need translating.
         elif not self.translate:
             i = 0
-            while i < output_len:
+            while i < len(output):
                 if seennl == SEEN_ALL:
                     break
                 c = output[i]
                 i += 1
-                if c == u'\n':
+                if c == '\n':
                     seennl |= SEEN_LF
-                elif c == u'\r':
-                    if i < output_len and output[i] == u'\n':
+                elif c == '\r':
+                    if i < len(output) and output[i] == '\n':
                         seennl |= SEEN_CRLF
                         i += 1
                     else:
                         seennl |= SEEN_CR
-        elif output.find(u'\r') >= 0:
+        elif output.find('\r') >= 0:
             # Translate!
-            builder = UnicodeBuilder(output_len)
+            builder = StringBuilder(len(output))
             i = 0
             while i < output_len:
                 c = output[i]
                 i += 1
-                if c == u'\n':
+                if c == '\n':
                     seennl |= SEEN_LF
-                elif c == u'\r':
-                    if i < output_len and output[i] == u'\n':
+                elif c == '\r':
+                    if i < len(output) and output[i] == '\n':
                         seennl |= SEEN_CRLF
                         i += 1
                     else:
                         seennl |= SEEN_CR
-                    builder.append(u'\n')
+                    builder.append('\n')
                     continue
                 builder.append(c)
             output = builder.build()
 
         self.seennl |= seennl
-        return space.newunicode(output)
+        lgt, flag = check_utf8(output, True)
+        return space.newutf8(output, lgt, flag)
 
     def reset_w(self, space):
         self.seennl = 0
@@ -373,8 +380,8 @@
         if space.is_none(w_newline):
             newline = None
         else:
-            newline = space.unicode_w(w_newline)
-        if newline and newline not in (u'\n', u'\r\n', u'\r'):
+            newline = space.utf8_w(w_newline)
+        if newline and newline not in ('\n', '\r\n', '\r'):
             raise oefmt(space.w_ValueError,
                         "illegal newline value: %R", w_newline)
 
@@ -384,13 +391,13 @@
         self.readtranslate = newline is None
         self.readnl = newline
 
-        self.writetranslate = (newline != u'')
+        self.writetranslate = (newline != '')
         if not self.readuniversal:
             self.writenl = self.readnl
-            if self.writenl == u'\n':
+            if self.writenl == '\n':
                 self.writenl = None
         elif _WINDOWS:
-            self.writenl = u"\r\n"
+            self.writenl = "\r\n"
         else:
             self.writenl = None
 
@@ -519,7 +526,7 @@
 
     def _get_decoded_chars(self, size):
         if self.decoded_chars is None:
-            return u""
+            return ""
 
         available = len(self.decoded_chars) - self.decoded_chars_used
         if size < 0 or size > available:
@@ -574,7 +581,7 @@
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
         check_decoded(space, w_decoded)
-        self._set_decoded_chars(space.unicode_w(w_decoded))
+        self._set_decoded_chars(space.utf8_w(w_decoded))
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -745,20 +752,19 @@
             raise oefmt(space.w_TypeError,
                         "unicode argument expected, got '%T'", w_text)
 
-        text = space.unicode_w(w_text)
-        textlen = len(text)
+        text, textlen = space.utf8_len_w(w_text)
 
         haslf = False
         if (self.writetranslate and self.writenl) or self.line_buffering:
-            if text.find(u'\n') >= 0:
+            if text.find('\n') >= 0:
                 haslf = True
         if haslf and self.writetranslate and self.writenl:
             w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'),
                                        space.newunicode(self.writenl))
-            text = space.unicode_w(w_text)
+            text = space.utf8_w(w_text)
 
         needflush = False
-        if self.line_buffering and (haslf or text.find(u'\r') >= 0):
+        if self.line_buffering and (haslf or text.find('\r') >= 0):
             needflush = True
 
         # XXX What if we were just reading?

From pypy.commits at gmail.com  Wed Nov 22 23:16:09 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 20:16:09 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Do some unicode>utf8 conversions
 in interp_textio
Message-ID: <5a164b89.169a1c0a.63814.9b51@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93128:a8f461710bf8
Date: 2017-11-23 03:34 +0000
http://bitbucket.org/pypy/pypy/changeset/a8f461710bf8/

Log:	Do some unicode>utf8 conversions in interp_textio

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -97,7 +97,7 @@
                 output_len -= 1
 
         if output_len == 0:
-            return space.newutf8("", 1, FLAG_ASCII)
+            return space.newutf8("", 0, FLAG_ASCII)
 
         # Record which newlines are read and do newline translation if
         # desired, all in one pass.
@@ -226,7 +226,7 @@
         if self.readtranslate:
 
             # Newlines are already translated, only search for \n
-            pos = line.find(u'\n', start, end)
+            pos = line.find('\n', start, end)
             if pos >= 0:
                 return pos - start + 1, 0
             else:
@@ -617,13 +617,13 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.newunicode(self._get_decoded_chars(-1))
+            w_result = space.new_from_utf8(self._get_decoded_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
 
         remaining = size
-        builder = UnicodeBuilder(size)
+        builder = StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
         while True:
@@ -643,7 +643,7 @@
                     continue
                 raise
 
-        return space.newunicode(builder.build())
+        return space.new_from_utf8(builder.build())
 
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
@@ -731,12 +731,12 @@
         if chunks:
             if line:
                 chunks.append(line)
-            line = u''.join(chunks)
+            line = ''.join(chunks)
 
         if line:
-            return space.newunicode(line)
+            return space.new_from_utf8(line)
         else:
-            return space.newunicode(u'')
+            return space.newutf8('', 0, FLAG_ASCII)
 
     # _____________________________________________________________
     # write methods
@@ -759,8 +759,8 @@
             if text.find('\n') >= 0:
                 haslf = True
         if haslf and self.writetranslate and self.writenl:
-            w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'),
-                                       space.newunicode(self.writenl))
+            w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'),
+                                       space.new_from_utf8(self.writenl))
             text = space.utf8_w(w_text)
 
         needflush = False
@@ -982,7 +982,7 @@
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(input[i]))
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
 
                 cookie.bytes_to_feed += 1
 

From pypy.commits at gmail.com  Wed Nov 22 23:16:07 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 20:16:07 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Add (back) convenience methods
 space.newunicode(), space.new_from_utf8() and
Message-ID: <5a164b87.83b91c0a.f6cd7.8e07@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93127:b89046216269
Date: 2017-11-23 03:14 +0000
http://bitbucket.org/pypy/pypy/changeset/b89046216269/

Log:	Add (back) convenience methods space.newunicode(),
	space.new_from_utf8() and space.unicode_w()

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -272,7 +272,7 @@
         self._typed_unwrap_error(space, "unicode")
 
     def convert_to_w_unicode(self, space):
-        self._typed_unwrap_error(space, "unicode")        
+        self._typed_unwrap_error(space, "unicode")
 
     def bytearray_list_of_chars_w(self, space):
         self._typed_unwrap_error(space, "bytearray")
@@ -1759,6 +1759,11 @@
 
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
+
+    def unicode_w(self, w_obj):
+        # XXX: kill me!
+        return w_obj.utf8_w(self).decode('utf-8')
+
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,10 +367,23 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
+    def new_from_utf8(self, utf8s):
+        # XXX: kill me!
+        assert isinstance(utf8s, str)
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return W_UnicodeObject(utf8s, length, flag)
+
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
+    def newunicode(self, unistr):
+        # XXX: kill me!
+        assert isinstance(unistr, unicode)
+        utf8s = unistr.encode("utf-8")
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return self.newutf8(utf8s, length, flag)
+
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)

From pypy.commits at gmail.com  Thu Nov 23 00:14:51 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 21:14:51 -0800 (PST)
Subject: [pypy-commit] pypy default: Refactor interp_textio.py a little
Message-ID: <5a16594b.8b8a1c0a.47f9e.aaf0@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93129:6eab39056eb5
Date: 2017-11-23 05:14 +0000
http://bitbucket.org/pypy/pypy/changeset/6eab39056eb5/

Log:	Refactor interp_textio.py a little

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -184,9 +184,7 @@
             start,
             end
         )
-        if endpos >= 0:
-            endpos += start
-        else:
+        if endpos < 0:
             endpos = end
         assert endpos >= 0
         self.pos = endpos
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -217,30 +217,28 @@
     def _find_line_ending(self, line, start, end):
         size = end - start
         if self.readtranslate:
-
             # Newlines are already translated, only search for \n
             pos = line.find(u'\n', start, end)
             if pos >= 0:
-                return pos - start + 1, 0
+                return pos + 1, 0
             else:
                 return -1, size
         elif self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
-            i = 0
+            i = start
             while True:
-                # Fast path for non-control chars. The loop always ends
-                # since the Py_UNICODE storage is NUL-terminated.
-                while i < size and line[start + i] > '\r':
+                # Fast path for non-control chars.
+                while i < end and line[i] > '\r':
                     i += 1
-                if i >= size:
+                if i >= end:
                     return -1, size
-                ch = line[start + i]
+                ch = line[i]
                 i += 1
                 if ch == '\n':
                     return i, 0
                 if ch == '\r':
-                    if line[start + i] == '\n':
+                    if line[i] == '\n':
                         return i + 1, 0
                     else:
                         return i, 0
@@ -248,7 +246,7 @@
             # Non-universal mode.
             pos = line.find(self.readnl, start, end)
             if pos >= 0:
-                return pos - start + len(self.readnl), 0
+                return pos + len(self.readnl), 0
             else:
                 pos = line.find(self.readnl[0], start, end)
                 if pos >= 0:
@@ -513,8 +511,13 @@
     # _____________________________________________________________
     # read methods
 
-    def _set_decoded_chars(self, chars):
-        self.decoded_chars = chars
+    def _unset_decoded(self):
+        self.decoded_chars = None
+        self.decoded_chars_used = 0
+
+    def _set_decoded(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.decoded_chars = space.unicode_w(w_decoded)
         self.decoded_chars_used = 0
 
     def _get_decoded_chars(self, size):
@@ -573,8 +576,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        check_decoded(space, w_decoded)
-        self._set_decoded_chars(space.unicode_w(w_decoded))
+        self._set_decoded(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -664,7 +666,7 @@
                     raise
             if not has_data:
                 # end of file
-                self._set_decoded_chars(None)
+                self._unset_decoded()
                 self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
@@ -683,7 +685,6 @@
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
             if endpos >= 0:
-                endpos += start
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
                     assert endpos >= 0
@@ -709,7 +710,7 @@
                 remaining = line[endpos:]
             line = None
             # We have consumed the buffer
-            self._set_decoded_chars(None)
+            self._unset_decoded()
 
         if line:
             # Our line ends in the current buffer
@@ -861,7 +862,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._set_decoded_chars(None)
+            self._unset_decoded()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -886,7 +887,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._set_decoded_chars(None)
+        self._unset_decoded()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -907,8 +908,7 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            check_decoded(space, w_decoded)
-            self._set_decoded_chars(space.unicode_w(w_decoded))
+            self._set_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
             if len(self.decoded_chars) < cookie.chars_to_skip:
@@ -976,7 +976,7 @@
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(input[i]))
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
 
                 cookie.bytes_to_feed += 1
 

From pypy.commits at gmail.com  Thu Nov 23 01:06:45 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 22:06:45 -0800 (PST)
Subject: [pypy-commit] pypy default: Use a UnicodeBuilder in
 _io.TextIOWrapper.readline
Message-ID: <5a166575.103e1c0a.4e643.533d@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93130:870515a86876
Date: 2017-11-23 06:06 +0000
http://bitbucket.org/pypy/pypy/changeset/870515a86876/

Log:	Use a UnicodeBuilder in _io.TextIOWrapper.readline

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -646,11 +646,10 @@
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-        chunked = 0
 
         line = None
         remaining = None
-        chunks = []
+        builder = UnicodeBuilder()
 
         while True:
             # First, get some data if necessary
@@ -684,6 +683,7 @@
 
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
+            chunked = builder.getlength()
             if endpos >= 0:
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
@@ -702,8 +702,8 @@
             # No line ending seen yet - put aside current data
             if endpos > start:
                 s = line[start:endpos]
-                chunks.append(s)
-                chunked += len(s)
+                builder.append(s)
+
             # There may be some remaining bytes we'll have to prepend to the
             # next chunk of data
             if endpos < line_len:
@@ -719,18 +719,12 @@
             self.decoded_chars_used = decoded_chars_used
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
-        if remaining:
-            chunks.append(remaining)
-            remaining = None
-        if chunks:
-            if line:
-                chunks.append(line)
-            line = u''.join(chunks)
+            builder.append(line)
+        elif remaining:
+            builder.append(remaining)
 
-        if line:
-            return space.newunicode(line)
-        else:
-            return space.newunicode(u'')
+        result = builder.build()
+        return space.newunicode(result)
 
     # _____________________________________________________________
     # write methods

From pypy.commits at gmail.com  Thu Nov 23 01:42:30 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 22:42:30 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Refactor interp_textio.py a little
Message-ID: <5a166dd6.22a8df0a.e5ef.d731@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93131:031e80f0a68e
Date: 2017-11-23 05:14 +0000
http://bitbucket.org/pypy/pypy/changeset/031e80f0a68e/

Log:	Refactor interp_textio.py a little

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -184,9 +184,7 @@
             start,
             end
         )
-        if endpos >= 0:
-            endpos += start
-        else:
+        if endpos < 0:
             endpos = end
         assert endpos >= 0
         self.pos = endpos
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -224,30 +224,28 @@
     def _find_line_ending(self, line, start, end):
         size = end - start
         if self.readtranslate:
-
             # Newlines are already translated, only search for \n
             pos = line.find('\n', start, end)
             if pos >= 0:
-                return pos - start + 1, 0
+                return pos + 1, 0
             else:
                 return -1, size
         elif self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
-            i = 0
+            i = start
             while True:
-                # Fast path for non-control chars. The loop always ends
-                # since the Py_UNICODE storage is NUL-terminated.
-                while i < size and line[start + i] > '\r':
+                # Fast path for non-control chars.
+                while i < end and line[i] > '\r':
                     i += 1
-                if i >= size:
+                if i >= end:
                     return -1, size
-                ch = line[start + i]
+                ch = line[i]
                 i += 1
                 if ch == '\n':
                     return i, 0
                 if ch == '\r':
-                    if line[start + i] == '\n':
+                    if line[i] == '\n':
                         return i + 1, 0
                     else:
                         return i, 0
@@ -255,7 +253,7 @@
             # Non-universal mode.
             pos = line.find(self.readnl, start, end)
             if pos >= 0:
-                return pos - start + len(self.readnl), 0
+                return pos + len(self.readnl), 0
             else:
                 pos = line.find(self.readnl[0], start, end)
                 if pos >= 0:
@@ -520,8 +518,13 @@
     # _____________________________________________________________
     # read methods
 
-    def _set_decoded_chars(self, chars):
-        self.decoded_chars = chars
+    def _unset_decoded(self):
+        self.decoded_chars = None
+        self.decoded_chars_used = 0
+
+    def _set_decoded(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.decoded_chars = space.utf8_w(w_decoded)
         self.decoded_chars_used = 0
 
     def _get_decoded_chars(self, size):
@@ -580,8 +583,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        check_decoded(space, w_decoded)
-        self._set_decoded_chars(space.utf8_w(w_decoded))
+        self._set_decoded(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -671,7 +673,7 @@
                     raise
             if not has_data:
                 # end of file
-                self._set_decoded_chars(None)
+                self._unset_decoded()
                 self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
@@ -690,7 +692,6 @@
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
             if endpos >= 0:
-                endpos += start
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
                     assert endpos >= 0
@@ -716,7 +717,7 @@
                 remaining = line[endpos:]
             line = None
             # We have consumed the buffer
-            self._set_decoded_chars(None)
+            self._unset_decoded()
 
         if line:
             # Our line ends in the current buffer
@@ -867,7 +868,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._set_decoded_chars(None)
+            self._unset_decoded()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -892,7 +893,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._set_decoded_chars(None)
+        self._unset_decoded()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -913,8 +914,7 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            check_decoded(space, w_decoded)
-            self._set_decoded_chars(space.unicode_w(w_decoded))
+            self._set_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
             if len(self.decoded_chars) < cookie.chars_to_skip:

From pypy.commits at gmail.com  Thu Nov 23 01:42:32 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 22 Nov 2017 22:42:32 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Use a UnicodeBuilder in
 _io.TextIOWrapper.readline
Message-ID: <5a166dd8.0eef1c0a.3a2c3.08c2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93132:8c2553a25336
Date: 2017-11-23 06:06 +0000
http://bitbucket.org/pypy/pypy/changeset/8c2553a25336/

Log:	Use a UnicodeBuilder in _io.TextIOWrapper.readline

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -653,11 +653,10 @@
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-        chunked = 0
 
         line = None
         remaining = None
-        chunks = []
+        builder = StringBuilder()
 
         while True:
             # First, get some data if necessary
@@ -691,6 +690,7 @@
 
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
+            chunked = builder.getlength()
             if endpos >= 0:
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
@@ -709,8 +709,8 @@
             # No line ending seen yet - put aside current data
             if endpos > start:
                 s = line[start:endpos]
-                chunks.append(s)
-                chunked += len(s)
+                builder.append(s)
+
             # There may be some remaining bytes we'll have to prepend to the
             # next chunk of data
             if endpos < line_len:
@@ -726,18 +726,12 @@
             self.decoded_chars_used = decoded_chars_used
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
-        if remaining:
-            chunks.append(remaining)
-            remaining = None
-        if chunks:
-            if line:
-                chunks.append(line)
-            line = ''.join(chunks)
+            builder.append(line)
+        elif remaining:
+            builder.append(remaining)
 
-        if line:
-            return space.new_from_utf8(line)
-        else:
-            return space.newutf8('', 0, FLAG_ASCII)
+        result = builder.build()
+        return space.new_from_utf8(result)
 
     # _____________________________________________________________
     # write methods

From pypy.commits at gmail.com  Thu Nov 23 04:27:49 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 23 Nov 2017 01:27:49 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Tweak the unicode FLAG_xx values
 for performance; collapse two identical helpers;
 move combine_flags() to rutf8
Message-ID: <5a169495.54d91c0a.8efdd.63ae@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93133:a1cf21d7a124
Date: 2017-11-23 10:24 +0100
http://bitbucket.org/pypy/pypy/changeset/a1cf21d7a124/

Log:	Tweak the unicode FLAG_xx values for performance; collapse two
	identical helpers; move combine_flags() to rutf8

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -3,6 +3,7 @@
 from pypy.interpreter.error import OperationError
 from rpython.rlib.objectmodel import specialize
 from rpython.rlib import rutf8
+from rpython.rlib.rutf8 import combine_flags
 from rpython.rlib.rarithmetic import r_uint, intmask
 from rpython.rlib.rstring import StringBuilder
 from pypy.module._codecs import interp_codecs
@@ -43,14 +44,6 @@
     from pypy.objspace.std.unicodeobject import encode_object
     return encode_object(space, w_data, encoding, errors)
 
-def combine_flags(one, two):
-    if one == rutf8.FLAG_ASCII and two == rutf8.FLAG_ASCII:
-        return rutf8.FLAG_ASCII
-    elif (one == rutf8.FLAG_HAS_SURROGATES or
-          two == rutf8.FLAG_HAS_SURROGATES):
-        return rutf8.FLAG_HAS_SURROGATES
-    return rutf8.FLAG_REGULAR
-
 
 def _has_surrogate(u):
     for c in u:
@@ -788,7 +781,8 @@
                         # first surrogate
                         surrogate = outCh
                     else:
-                        flag = combine_flags(flag, rutf8.unichr_to_flag(outCh))
+                        flag = combine_flags(flag,
+                                             rutf8.get_flag_from_code(outCh))
                         outsize += 1
                         assert outCh >= 0
                         rutf8.unichr_as_utf8_append(result, outCh, True)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -356,7 +356,7 @@
             elif unicodedb.islower(ch):
                 ch = unicodedb.toupper(ch)
             if ch >= 0x80:
-                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, ch)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
@@ -381,7 +381,7 @@
             else:
                 ch = unicodedb.tolower(ch)
             if ch >= 0x80:
-                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, ch)
             previous_is_cased = unicodedb.iscased(ch)
         return builder.build(), flag
@@ -407,7 +407,7 @@
                     codepoint = space.int_w(w_newval)
                 elif isinstance(w_newval, W_UnicodeObject):
                     result.append(w_newval._utf8)
-                    flag = unicodehelper.combine_flags(flag, w_newval._get_flag())
+                    flag = rutf8.combine_flags(flag, w_newval._get_flag())
                     result_length += w_newval._length
                     continue
                 else:
@@ -416,7 +416,7 @@
                                 "or unicode")
             try:
                 if codepoint >= 0x80:
-                    flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                    flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
                 rutf8.unichr_as_utf8_append(result, codepoint,
                                             allow_surrogates=True)
                 result_length += 1
@@ -540,7 +540,7 @@
         while pos < len(self._utf8):
             lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
             if lower >= 0x80:
-                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
             pos = rutf8.next_codepoint_pos(self._utf8, pos)
         return W_UnicodeObject(builder.build(), self._len(), flag)
@@ -642,7 +642,7 @@
             if e.match(space, space.w_TypeError):
                 return space.w_NotImplemented
             raise
-        flag = unicodehelper.combine_flags(self._get_flag(), w_other._get_flag())
+        flag = rutf8.combine_flags(self._get_flag(), w_other._get_flag())
         return W_UnicodeObject(self._utf8 + w_other._utf8,
                                self._len() + w_other._len(), flag)
 
@@ -667,7 +667,7 @@
             # XXX Maybe the extra copy here is okay? It was basically going to
             #     happen anyway, what with being placed into the builder
             w_u = self.convert_arg_to_w_unicode(space, w_s)
-            flag = unicodehelper.combine_flags(flag, w_u._get_flag())
+            flag = rutf8.combine_flags(flag, w_u._get_flag())
             unwrapped.append(w_u._utf8)
             lgt += w_u._length
             prealloc_size += len(unwrapped[i])
@@ -719,7 +719,7 @@
             uchar = rutf8.codepoint_at_pos(value, i)
             uchar = unicodedb.toupper(uchar)
             if uchar >= 0x80:
-                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             i = rutf8.next_codepoint_pos(value, i)
             rutf8.unichr_as_utf8_append(builder, uchar)
         return W_UnicodeObject(builder.build(), self._length, flag)
@@ -833,14 +833,14 @@
         ch = unicodedb.toupper(uchar)
         rutf8.unichr_as_utf8_append(builder, ch)
         if ch >= 0x80:
-            flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+            flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
             i = rutf8.next_codepoint_pos(value, i)
             ch = unicodedb.tolower(uchar)
             rutf8.unichr_as_utf8_append(builder, ch)
             if ch >= 0x80:
-                flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR)
+                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         return W_UnicodeObject(builder.build(), self._len(), flag)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
@@ -926,7 +926,7 @@
         except OverflowError:
             raise oefmt(space.w_OverflowError, "replace string is too long")
 
-        flag = unicodehelper.combine_flags(self._get_flag(), w_by._get_flag())
+        flag = rutf8.combine_flags(self._get_flag(), w_by._get_flag())
         newlength = self._length + replacements * (w_by._length - w_sub._length)
         return W_UnicodeObject(res, newlength, flag)
 
@@ -1048,7 +1048,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "rjust() argument 2 must be a single character")
-        flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag())
+        flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - lgt
         if d > 0:
             if len(w_fillchar._utf8) == 1:
@@ -1067,7 +1067,7 @@
         if w_fillchar._len() != 1:
             raise oefmt(space.w_TypeError,
                         "ljust() argument 2 must be a single character")
-        flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag())
+        flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag())
         d = width - self._len()
         if d > 0:
             if len(w_fillchar._utf8) == 1:
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -50,6 +50,7 @@
 def unichr_as_utf8_append(builder, code, allow_surrogates=False):
     """Encode code (numeric value) as utf8 encoded string
     and emit the result into the given StringBuilder.
+    Raises ValueError if the code is outside range(0x110000).
     """
     code = r_uint(code)
     if code <= r_uint(0x7F):
@@ -124,13 +125,6 @@
             continuation_bytes += 1
     return len(s) - continuation_bytes
 
-def get_flag_from_code(oc):
-    if oc <= 0x7F:
-        return FLAG_ASCII
-    if 0xD800 <= oc <= 0xDFFF:
-        return FLAG_HAS_SURROGATES
-    return FLAG_REGULAR
-
 def codepoint_at_pos(code, pos):
     """ Give a codepoint in code at pos - assumes valid utf8, no checking!
     """
@@ -453,22 +447,24 @@
 
 UTF8_INDEX_STORAGE = lltype.GcStruct('utf8_loc',
     ('flag', lltype.Signed),
-    ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct(
-    'utf8_loc_elem',
-    ('baseindex', lltype.Signed),
-    ('ofs', lltype.FixedSizeArray(lltype.Char, 16)))
-    ))))
+    ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct('utf8_loc_elem',
+        ('baseindex', lltype.Signed),
+        ('ofs', lltype.FixedSizeArray(lltype.Char, 16)),
+    )))))
 
-def unichr_to_flag(ch):
-    if ch <= 0x7F:
+def get_flag_from_code(oc):
+    if oc <= 0x7F:
         return FLAG_ASCII
-    elif 0xD800 <= ch <= 0xDFFF:
+    if 0xD800 <= oc <= 0xDFFF:
         return FLAG_HAS_SURROGATES
     return FLAG_REGULAR
 
-FLAG_REGULAR = 0
-FLAG_HAS_SURROGATES = 1
-FLAG_ASCII = 2
+def combine_flags(one, two):
+    return one | two
+
+FLAG_ASCII          = 0     # no bits
+FLAG_REGULAR        = 1     # bit 0
+FLAG_HAS_SURROGATES = 3     # bit 0 and bit 1
 # note that we never need index storage if we're pure ascii, but it's useful
 # for passing into W_UnicodeObject.__init__
 

From pypy.commits at gmail.com  Thu Nov 23 04:27:51 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 23 Nov 2017 01:27:51 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge heads
Message-ID: <5a169497.cb921c0a.f57f4.8bf0@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93134:25ac6121d03c
Date: 2017-11-23 10:26 +0100
http://bitbucket.org/pypy/pypy/changeset/25ac6121d03c/

Log:	merge heads

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -272,7 +272,7 @@
         self._typed_unwrap_error(space, "unicode")
 
     def convert_to_w_unicode(self, space):
-        self._typed_unwrap_error(space, "unicode")        
+        self._typed_unwrap_error(space, "unicode")
 
     def bytearray_list_of_chars_w(self, space):
         self._typed_unwrap_error(space, "bytearray")
@@ -1759,6 +1759,11 @@
 
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
+
+    def unicode_w(self, w_obj):
+        # XXX: kill me!
+        return w_obj.utf8_w(self).decode('utf-8')
+
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -184,9 +184,7 @@
             start,
             end
         )
-        if endpos >= 0:
-            endpos += start
-        else:
+        if endpos < 0:
             endpos = end
         assert endpos >= 0
         self.pos = endpos
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -97,7 +97,7 @@
                 output_len -= 1
 
         if output_len == 0:
-            return space.newutf8("", 1, FLAG_ASCII)
+            return space.newutf8("", 0, FLAG_ASCII)
 
         # Record which newlines are read and do newline translation if
         # desired, all in one pass.
@@ -224,30 +224,28 @@
     def _find_line_ending(self, line, start, end):
         size = end - start
         if self.readtranslate:
-
             # Newlines are already translated, only search for \n
-            pos = line.find(u'\n', start, end)
+            pos = line.find('\n', start, end)
             if pos >= 0:
-                return pos - start + 1, 0
+                return pos + 1, 0
             else:
                 return -1, size
         elif self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
-            i = 0
+            i = start
             while True:
-                # Fast path for non-control chars. The loop always ends
-                # since the Py_UNICODE storage is NUL-terminated.
-                while i < size and line[start + i] > '\r':
+                # Fast path for non-control chars.
+                while i < end and line[i] > '\r':
                     i += 1
-                if i >= size:
+                if i >= end:
                     return -1, size
-                ch = line[start + i]
+                ch = line[i]
                 i += 1
                 if ch == '\n':
                     return i, 0
                 if ch == '\r':
-                    if line[start + i] == '\n':
+                    if line[i] == '\n':
                         return i + 1, 0
                     else:
                         return i, 0
@@ -255,7 +253,7 @@
             # Non-universal mode.
             pos = line.find(self.readnl, start, end)
             if pos >= 0:
-                return pos - start + len(self.readnl), 0
+                return pos + len(self.readnl), 0
             else:
                 pos = line.find(self.readnl[0], start, end)
                 if pos >= 0:
@@ -520,8 +518,13 @@
     # _____________________________________________________________
     # read methods
 
-    def _set_decoded_chars(self, chars):
-        self.decoded_chars = chars
+    def _unset_decoded(self):
+        self.decoded_chars = None
+        self.decoded_chars_used = 0
+
+    def _set_decoded(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.decoded_chars = space.utf8_w(w_decoded)
         self.decoded_chars_used = 0
 
     def _get_decoded_chars(self, size):
@@ -580,8 +583,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        check_decoded(space, w_decoded)
-        self._set_decoded_chars(space.utf8_w(w_decoded))
+        self._set_decoded(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -617,13 +619,13 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.newunicode(self._get_decoded_chars(-1))
+            w_result = space.new_from_utf8(self._get_decoded_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
 
         remaining = size
-        builder = UnicodeBuilder(size)
+        builder = StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
         while True:
@@ -643,7 +645,7 @@
                     continue
                 raise
 
-        return space.newunicode(builder.build())
+        return space.new_from_utf8(builder.build())
 
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
@@ -651,11 +653,10 @@
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-        chunked = 0
 
         line = None
         remaining = None
-        chunks = []
+        builder = StringBuilder()
 
         while True:
             # First, get some data if necessary
@@ -671,7 +672,7 @@
                     raise
             if not has_data:
                 # end of file
-                self._set_decoded_chars(None)
+                self._unset_decoded()
                 self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
@@ -689,8 +690,8 @@
 
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
+            chunked = builder.getlength()
             if endpos >= 0:
-                endpos += start
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
                     assert endpos >= 0
@@ -708,15 +709,15 @@
             # No line ending seen yet - put aside current data
             if endpos > start:
                 s = line[start:endpos]
-                chunks.append(s)
-                chunked += len(s)
+                builder.append(s)
+
             # There may be some remaining bytes we'll have to prepend to the
             # next chunk of data
             if endpos < line_len:
                 remaining = line[endpos:]
             line = None
             # We have consumed the buffer
-            self._set_decoded_chars(None)
+            self._unset_decoded()
 
         if line:
             # Our line ends in the current buffer
@@ -725,18 +726,12 @@
             self.decoded_chars_used = decoded_chars_used
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
-        if remaining:
-            chunks.append(remaining)
-            remaining = None
-        if chunks:
-            if line:
-                chunks.append(line)
-            line = u''.join(chunks)
+            builder.append(line)
+        elif remaining:
+            builder.append(remaining)
 
-        if line:
-            return space.newunicode(line)
-        else:
-            return space.newunicode(u'')
+        result = builder.build()
+        return space.new_from_utf8(result)
 
     # _____________________________________________________________
     # write methods
@@ -759,8 +754,8 @@
             if text.find('\n') >= 0:
                 haslf = True
         if haslf and self.writetranslate and self.writenl:
-            w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'),
-                                       space.newunicode(self.writenl))
+            w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'),
+                                       space.new_from_utf8(self.writenl))
             text = space.utf8_w(w_text)
 
         needflush = False
@@ -867,7 +862,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._set_decoded_chars(None)
+            self._unset_decoded()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -892,7 +887,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._set_decoded_chars(None)
+        self._unset_decoded()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -913,8 +908,7 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            check_decoded(space, w_decoded)
-            self._set_decoded_chars(space.unicode_w(w_decoded))
+            self._set_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
             if len(self.decoded_chars) < cookie.chars_to_skip:
@@ -982,7 +976,7 @@
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(input[i]))
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
 
                 cookie.bytes_to_feed += 1
 
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,10 +367,23 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
+    def new_from_utf8(self, utf8s):
+        # XXX: kill me!
+        assert isinstance(utf8s, str)
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return W_UnicodeObject(utf8s, length, flag)
+
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
+    def newunicode(self, unistr):
+        # XXX: kill me!
+        assert isinstance(unistr, unicode)
+        utf8s = unistr.encode("utf-8")
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return self.newutf8(utf8s, length, flag)
+
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)

From pypy.commits at gmail.com  Thu Nov 23 04:33:47 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 23 Nov 2017 01:33:47 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Tests and fixes for
 'allow_surrogates=True' in various unicode methods
Message-ID: <5a1695fb.7996df0a.4610b.dcfb@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93135:16bfad77e3d5
Date: 2017-11-23 10:33 +0100
http://bitbucket.org/pypy/pypy/changeset/16bfad77e3d5/

Log:	Tests and fixes for 'allow_surrogates=True' in various unicode
	methods

diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -299,6 +299,7 @@
         assert u"Brown Fox".title() == u"Brown Fox"
         assert u"bro!wn fox".title() == u"Bro!Wn Fox"
         assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+        assert u'\ud800'.title() == u'\ud800'
 
     def test_istitle(self):
         assert u"".istitle() == False
@@ -328,10 +329,12 @@
         assert u'A'.lower() == u'a'
         assert u'\u0105'.lower() == u'\u0105'
         assert u'\u0104'.lower() == u'\u0105'
+        assert u'\ud800'.lower() == u'\ud800'
         assert u'a'.upper() == u'A'
         assert u'A'.upper() == u'A'
         assert u'\u0105'.upper() == u'\u0104'
         assert u'\u0104'.upper() == u'\u0104'
+        assert u'\ud800'.upper() == u'\ud800'
 
     def test_capitalize(self):
         assert u"brown fox".capitalize() == u"Brown fox"
@@ -354,6 +357,8 @@
         # check with Ll chars with no upper - nothing changes here
         assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
                 u'\u019b\u1d00\u1d86\u0221\u1fb7')
+        assert u'\ud800'.capitalize() == u'\ud800'
+        assert u'xx\ud800'.capitalize() == u'Xx\ud800'
 
     def test_rjust(self):
         s = u"abc"
@@ -844,6 +849,7 @@
 
     def test_swapcase(self):
         assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+        assert u'\ud800'.swapcase() == u'\ud800'
 
     def test_buffer(self):
         buf = buffer(u'XY')
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -357,7 +357,7 @@
                 ch = unicodedb.toupper(ch)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
     def descr_title(self, space):
@@ -382,7 +382,7 @@
                 ch = unicodedb.tolower(ch)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
             previous_is_cased = unicodedb.iscased(ch)
         return builder.build(), flag
 
@@ -541,7 +541,7 @@
             lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
             if lower >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates?
+            rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True)
             pos = rutf8.next_codepoint_pos(self._utf8, pos)
         return W_UnicodeObject(builder.build(), self._len(), flag)
 
@@ -721,7 +721,7 @@
             if uchar >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
             i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, uchar)
+            rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True)
         return W_UnicodeObject(builder.build(), self._length, flag)
 
     @unwrap_spec(width=int)
@@ -831,14 +831,14 @@
         uchar = rutf8.codepoint_at_pos(value, 0)
         i = rutf8.next_codepoint_pos(value, 0)
         ch = unicodedb.toupper(uchar)
-        rutf8.unichr_as_utf8_append(builder, ch)
+        rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
         if ch >= 0x80:
             flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         while i < len(value):
             uchar = rutf8.codepoint_at_pos(value, i)
             i = rutf8.next_codepoint_pos(value, i)
             ch = unicodedb.tolower(uchar)
-            rutf8.unichr_as_utf8_append(builder, ch)
+            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
             if ch >= 0x80:
                 flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
         return W_UnicodeObject(builder.build(), self._len(), flag)

From pypy.commits at gmail.com  Thu Nov 23 04:48:57 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 23 Nov 2017 01:48:57 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Review for surrogates
Message-ID: <5a169989.93131c0a.19af0.57e5@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93136:dc6582a05b85
Date: 2017-11-23 10:48 +0100
http://bitbucket.org/pypy/pypy/changeset/dc6582a05b85/

Log:	Review for surrogates

diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -370,14 +370,15 @@
             builder.append(res)
         else:
             # when we get here, chr is a 32-bit unicode character
-            if chr > 0x10ffff:
+            try:
+                rutf8.unichr_as_utf8_append(builder, intmask(chr), True)
+            except ValueError:
                 message = "illegal Unicode character"
                 res, pos = errorhandler(errors, encoding,
                                         message, s, pos-2, pos+digits)
                 size, flag = rutf8.check_utf8(res, True)
                 builder.append(res)
             else:
-                rutf8.unichr_as_utf8_append(builder, intmask(chr), True)
                 flag = rutf8.get_flag_from_code(intmask(chr))
                 pos += digits
                 size = 1
@@ -466,7 +467,7 @@
                             pos += 1
                             x = (x<<3) + ord(ch) - ord('0')
             outsize += 1
-            if x >= 0x7F:
+            if x > 0x7F:
                 rutf8.unichr_as_utf8_append(builder, x)
                 flag = combine_flags(rutf8.FLAG_REGULAR, flag)
             else:
@@ -524,7 +525,9 @@
                     pos = look + 1
                     outsize += 1
                     flag = combine_flags(flag, rutf8.get_flag_from_code(code))
-                    rutf8.unichr_as_utf8_append(builder, code)
+                    rutf8.unichr_as_utf8_append(builder, code,
+                                                allow_surrogates=True)
+                    # xxx 'code' is probably always within range here...
                 else:
                     res, pos = errorhandler(errors, "unicodeescape",
                                             message, s, pos-1, look+1)
@@ -772,7 +775,8 @@
                             surrogate = 0
                             continue
                         else:
-                            rutf8.unichr_as_utf8_append(result, surrogate)
+                            rutf8.unichr_as_utf8_append(result, surrogate,
+                                                        allow_surrogates=True)
                             flag = rutf8.FLAG_HAS_SURROGATES
                             outsize += 1
                             surrogate = 0
@@ -1236,7 +1240,7 @@
             result.append(r)
             continue
 
-        rutf8.unichr_as_utf8_append(result, ch)
+        rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True)
         pos += 4
     r = result.build()
     lgt, flag = rutf8.check_utf8(r, True)
@@ -1360,7 +1364,7 @@
                                     s, pos, pos + unicode_bytes)
             result.append(res)
             continue
-        rutf8.unichr_as_utf8_append(result, intmask(t))
+        rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True)
         pos += unicode_bytes
     r = result.build()
     lgt, flag = rutf8.check_utf8(r, True)
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -127,7 +127,7 @@
                                     errorcb, namecb, stringdata)
         src = pypy_cjk_dec_outbuf(decodebuf)
         length = pypy_cjk_dec_outlen(decodebuf)
-        return rffi.wcharpsize2utf8(src, length)
+        return rffi.wcharpsize2utf8(src, length) # assumes no out-of-range chars
 
 def multibytecodec_decerror(decodebuf, e, errors,
                             errorcb, namecb, stringdata):
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1012,6 +1012,7 @@
 def wcharpsize2utf8(w, size):
     """ Helper to convert WCHARP pointer to utf8 in one go.
     Equivalent to wcharpsize2unicode().encode("utf8")
+    Raises ValueError if characters are outside range(0x110000)!
     """
     from rpython.rlib import rutf8
 

From pypy.commits at gmail.com  Thu Nov 23 09:41:28 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 23 Nov 2017 06:41:28 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Fixes for _cffi_backend
Message-ID: <5a16de18.499edf0a.e853.9322@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93137:a94b5860dbb3
Date: 2017-11-23 15:40 +0100
http://bitbucket.org/pypy/pypy/changeset/a94b5860dbb3/

Log:	Fixes for _cffi_backend

diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -64,13 +64,10 @@
         elif space.isinstance_w(w_value, space.w_unicode):
             from pypy.module._cffi_backend import wchar_helper
             w_u = space.convert_arg_to_w_unicode(w_value)
-            if self.citem.size == 4:
+            if self.ctitem.size == 2:
+                length = wchar_helper.utf8_size_as_char16(w_u._utf8)
+            else:
                 length = w_u._len()
-            else:
-                if not w_u._has_surrogates():
-                    length = w_u._len()
-                else:
-                    length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len())
             return (w_value, length + 1)
         else:
             explicitlength = space.getindex_w(w_value, space.w_OverflowError)
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -40,16 +40,13 @@
         return ord(s[0])
 
     def cast_unicode(self, w_ob):
-        import pdb
-        pdb.set_trace()
         space = self.space
         w_u = space.convert_arg_to_w_unicode(w_ob)
         if w_u._len() != 1:
             raise oefmt(space.w_TypeError,
                         "cannot cast unicode string of length %d to ctype '%s'",
                         w_u._len(), self.name)
-        ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0)
-        return intmask(ordinal)
+        return rutf8.codepoint_at_pos(w_u._utf8, 0)
 
     def cast(self, w_ob):
         from pypy.module._cffi_backend import ctypeptr
@@ -175,21 +172,19 @@
                 return self.space.newint(value)    # r_uint => 'long' object
 
     def convert_to_object(self, cdata):
-        if self.is_signed_wchar:
-            code = ord(rffi.cast(rffi.CWCHARP, cdata)[0])
-            return self.space.newutf8(
-                rutf8.unichr_as_utf8(code), 1,
-                rutf8.get_flag_from_code(code))
-        else:
-            value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
-            try:
-                u = wchar_helper.ordinal_to_unicode(value)
-            except wchar_helper.OutOfRange as e:
-                raise oefmt(self.space.w_ValueError,
-                            "char32_t out of range for "
-                            "conversion to unicode: %s", hex(e.ordinal))
-            return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1,
-                rutf8.get_flag_from_code(ord(u)))
+        value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
+        try:
+            utf8 = rutf8.unichr_as_utf8(value, allow_surrogates=True)
+        except ValueError:
+            if self.is_signed_wchar:
+                s = hex(intmask(value))
+            else:
+                s = hex(value)
+            raise oefmt(self.space.w_ValueError,
+                        "%s out of range for conversion to unicode: %s",
+                        self.name, s)
+        flag = rutf8.get_flag_from_code(intmask(value))
+        return self.space.newutf8(utf8, 1, flag)
 
     def string(self, cdataobj, maxlen):
         with cdataobj as ptr:
@@ -200,7 +195,13 @@
         # returns a r_uint.  If self.size == 2, it is smaller than 0x10000
         space = self.space
         if space.isinstance_w(w_ob, space.w_unicode):
-            return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0)
+            w_u = space.convert_arg_to_w_unicode(w_ob)
+            if w_u._len() != 1:
+                raise self._convert_error("single character", w_ob)
+            ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0)
+            if self.size == 2 and ordinal > 0xFFFF:
+                raise self._convert_error("single character <= 0xFFFF", w_ob)
+            return r_uint(ordinal)
         elif (isinstance(w_ob, cdataobj.W_CData) and
                isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
                w_ob.ctype.size == self.size):
@@ -214,15 +215,15 @@
 
     def unpack_ptr(self, w_ctypeptr, ptr, length):
         if self.size == 2:
-            u = wchar_helper.unicode_from_char16(ptr, length)
+            utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length)
         else:
             try:
-                u = wchar_helper.unicode_from_char32(ptr, length)
+                utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length)
             except wchar_helper.OutOfRange as e:
                 raise oefmt(self.space.w_ValueError,
-                            "char32_t out of range for "
-                            "conversion to unicode: %s", hex(e.ordinal))
-        return self.space.newunicode(u)
+                            "%s out of range for conversion to unicode: %s",
+                            self.name, hex(e.ordinal))
+        return self.space.newutf8(utf8, lgt, flag)
 
 
 class W_CTypePrimitiveSigned(W_CTypePrimitive):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -92,28 +92,20 @@
             if not space.isinstance_w(w_ob, space.w_unicode):
                 raise self._convert_error("unicode or list or tuple", w_ob)
             w_u = space.convert_arg_to_w_unicode(w_ob)
-            if self.size == 4:
+            s = w_u._utf8
+            if self.ctitem.size == 2:
+                n = wchar_helper.utf8_size_as_char16(s)
+            else:
                 n = w_u._len()
-            else:
-                if not w_u._has_surrogates():
-                    n = w_u._len()
-                else:
-                    n = wchar_helper.unicode_size_as_char16(w_u._utf8,
-                                                            w_u._len())
             if self.length >= 0 and n > self.length:
                 raise oefmt(space.w_IndexError,
                             "initializer unicode string is too long for '%s' "
                             "(got %d characters)", self.name, n)
             add_final_zero = (n != self.length)
             if self.ctitem.size == 2:
-                try:
-                    wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
-                except wchar_helper.OutOfRange as e:
-                    raise oefmt(self.space.w_ValueError,
-                                "unicode character ouf of range for "
-                                "conversion to char16_t: %s", hex(e.ordinal))
+                wchar_helper.utf8_to_char16(s, cdata, n, add_final_zero)
             else:
-                wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
+                wchar_helper.utf8_to_char32(s, cdata, n, add_final_zero)
         else:
             raise self._convert_error("list or tuple", w_ob)
 
@@ -334,8 +326,7 @@
             from pypy.module._cffi_backend import wchar_helper
             w_u = space.convert_arg_to_w_unicode(w_init)
             if self.ctitem.size == 2:
-                length = wchar_helper.unicode_size_as_char16(w_u._utf8,
-                                                             w_u._len())
+                length = wchar_helper.utf8_size_as_char16(w_u._utf8)
             else:
                 length = w_u._len()
             length += 1
diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py
@@ -0,0 +1,10 @@
+from hypothesis import given, strategies
+from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16
+
+
+
+ at given(strategies.text())
+def test_utf8_size_as_char16(u):
+    assert type(u) is unicode
+    length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u))
+    assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u)
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -6,41 +6,6 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw
 
-SIZE_UNICODE = 4
-
-
-if SIZE_UNICODE == 4:
-    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
-        return unichr(intmask(ordinal))
-else:
-    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
-        if ordinal <= 0xffff:
-            return unichr(intmask(ordinal))
-        elif ordinal <= 0x10ffff:
-            ordinal = intmask(ordinal - 0x10000)
-            return (unichr(0xD800 | (ordinal >> 10)) +
-                    unichr(0xDC00 | (ordinal & 0x3FF)))
-        else:
-            raise OutOfRange(ordinal)
-
-def is_surrogate(u, index):
-    return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
-            unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF))
-
-def as_surrogate(u, index):
-    ordinal = (ord(u[index + 0]) - 0xD800) << 10
-    ordinal |= (ord(u[index + 1]) - 0xDC00)
-    return r_uint(ordinal + 0x10000)
-
-def unicode_to_ordinal(u):
-    if len(u) == 1:
-        u = ord(u[0])
-        return r_uint(u)
-    elif SIZE_UNICODE == 2:
-        if len(u) == 2 and is_surrogate(u, 0):
-            return r_uint(as_surrogate(u, 0))
-    raise ValueError
-
 
 class OutOfRange(Exception):
     ordinal = 0
@@ -49,59 +14,41 @@
         ordinal = intmask(rffi.cast(rffi.INT, ordinal))
         self.ordinal = ordinal
 
-def _unicode_from_wchar(ptr, length):
-    return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length)
+def utf8_from_char32(ptr, length):
+    # 'ptr' is a pointer to 'length' 32-bit integers
+    ptr = rffi.cast(rffi.UINTP, ptr)
+    u = StringBuilder(length)
+    j = 0
+    flag = rutf8.FLAG_ASCII
+    while j < length:
+        ch = intmask(ptr[j])
+        j += 1
+        flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch))
+        try:
+            rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True)
+        except ValueError:
+            raise OutOfRange(ch)
+    return u.build(), length, flag
 
-
-if SIZE_UNICODE == 2:
-    def unicode_from_char32(ptr, length):
-        # 'ptr' is a pointer to 'length' 32-bit integers
-        ptr = rffi.cast(rffi.UINTP, ptr)
-        alloc = length
-        for i in range(length):
-            if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF:
-                alloc += 1
-
-        u = [u'\x00'] * alloc
-        j = 0
-        for i in range(length):
-            ordinal = rffi.cast(lltype.Unsigned, ptr[i])
-            if ordinal > 0xFFFF:
-                if ordinal > 0x10FFFF:
-                    raise OutOfRange(ordinal)
-                ordinal = intmask(ordinal - 0x10000)
-                u[j] = unichr(0xD800 | (ordinal >> 10))
+def utf8_from_char16(ptr, length):
+    # 'ptr' is a pointer to 'length' 16-bit integers
+    ptr = rffi.cast(rffi.USHORTP, ptr)
+    u = StringBuilder(length)
+    j = 0
+    result_length = length
+    flag = rutf8.FLAG_ASCII
+    while j < length:
+        ch = intmask(ptr[j])
+        j += 1
+        if 0xD800 <= ch <= 0xDBFF and j < length:
+            ch2 = intmask(ptr[j])
+            if 0xDC00 <= ch2 <= 0xDFFF:
+                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
                 j += 1
-                u[j] = unichr(0xDC00 | (ordinal & 0x3FF))
-                j += 1
-            else:
-                u[j] = unichr(intmask(ordinal))
-                j += 1
-        assert j == len(u)
-        return u''.join(u)
-
-    unicode_from_char16 = _unicode_from_wchar
-
-else:
-    unicode_from_char32 = _unicode_from_wchar
-
-    def unicode_from_char16(ptr, length):
-        # 'ptr' is a pointer to 'length' 16-bit integers
-        ptr = rffi.cast(rffi.USHORTP, ptr)
-        u = StringBuilder(length)
-        i = 0
-        j = 0
-        while j < length:
-            ch = intmask(ptr[j])
-            j += 1
-            if 0xD800 <= ch <= 0xDBFF and j < length:
-                ch2 = intmask(ptr[j])
-                if 0xDC00 <= ch2 <= 0xDFFF:
-                    ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
-                    j += 1
-            rutf8.unichr_as_utf8_append(u, ch)
-            i += 1
-        return u.build()
+                result_length -= 1
+        flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch))
+        rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True)
+    return u.build(), result_length, flag
 
 
 @specialize.ll()
@@ -122,65 +69,44 @@
     return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen)
 
 
-def unicode_size_as_char16(u, len):
-    result = len
-    i = 0
-    while i < len(u):
-        code = rutf8.codepoint_at_pos(u, i)
-        if code > 0xFFFF:
-            result += 1
-        i = rutf8.next_codepoint_pos(u, i)
+def utf8_size_as_char16(u):
+    # Counts one per unichar in 'u', or two if they are greater than 0xffff.
+    TABLE = "\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x02"
+    result = 0
+    for c in u:
+        result += ord(TABLE[ord(c) >> 4])
     return result
 
-def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero):
-    # 'target_ptr' is a raw pointer to 'target_length' wchars;
-    # we assume here that target_length == len(u).
-    unichardata = rffi.cast(rffi.CWCHARP, target_ptr)
-    copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length)
+def utf8_to_char32(utf8, target_ptr, target_length, add_final_zero):
+    # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
+    # we assume (and check) that target_length == number of unichars in utf8.
+    unichardata = rffi.cast(rffi.UINTP, target_ptr)
+    i = 0
+    for j in range(target_length):
+        code = rutf8.codepoint_at_pos(utf8, i)
+        unichardata[j] = rffi.cast(rffi.UINT, code)
+        i = rutf8.next_codepoint_pos(utf8, i)
+    assert i == len(utf8)
     if add_final_zero:
-        unichardata[target_length] = u'\x00'
+        unichardata[target_length] = rffi.cast(rffi.UINT, 0)
 
-
-if SIZE_UNICODE == 2:
-    def unicode_to_char32(u, target_ptr, target_length, add_final_zero):
-        # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
-        # we assume here that target_length == unicode_size_as_char32(u).
-        ptr = rffi.cast(rffi.UINTP, target_ptr)
-        src_index = 0
-        last_surrogate_pos = len(u) - 2
-        for i in range(target_length):
-            if src_index <= last_surrogate_pos and is_surrogate(u, src_index):
-                ordinal = as_surrogate(u, src_index)
-                src_index += 2
-            else:
-                ordinal = r_uint(ord(u[src_index]))
-                src_index += 1
-            ptr[i] = rffi.cast(rffi.UINT, ordinal)
-        if add_final_zero:
-            ptr[target_length] = rffi.cast(rffi.UINT, 0)
-
-    unicode_to_char16 = _unicode_to_wchar
-
-else:
-    unicode_to_char32 = _unicode_to_wchar
-
-    def unicode_to_char16(u, target_ptr, target_length, add_final_zero):
-        # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
-        # we assume here that target_length == unicode_size_as_char16(u).
-        ptr = rffi.cast(rffi.USHORTP, target_ptr)
-        for uc in u:
-            ordinal = ord(uc)
-            if ordinal > 0xFFFF:
-                if ordinal > 0x10FFFF:
-                    raise OutOfRange(ordinal)
-                ordinal -= 0x10000
-                ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
-                ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
-                ptr = rffi.ptradd(ptr, 2)
-            else:
-                ptr[0] = rffi.cast(rffi.USHORT, ordinal)
-                ptr = rffi.ptradd(ptr, 1)
-        assert ptr == (
-            rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
-        if add_final_zero:
-            ptr[0] = rffi.cast(rffi.USHORT, 0)
+def utf8_to_char16(utf8, target_ptr, target_length, add_final_zero):
+    # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
+    # we assume (and check) that target_length == utf8_size_as_char16(utf8).
+    ptr = rffi.cast(rffi.USHORTP, target_ptr)
+    i = 0
+    while i < len(utf8):
+        ordinal = rutf8.codepoint_at_pos(utf8, i)
+        if ordinal > 0xFFFF:
+            ordinal -= 0x10000
+            ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
+            ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
+            ptr = rffi.ptradd(ptr, 2)
+        else:
+            ptr[0] = rffi.cast(rffi.USHORT, ordinal)
+            ptr = rffi.ptradd(ptr, 1)
+        i = rutf8.next_codepoint_pos(utf8, i)
+    assert ptr == (
+        rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
+    if add_final_zero:
+        ptr[0] = rffi.cast(rffi.USHORT, 0)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -453,6 +453,7 @@
     )))))
 
 def get_flag_from_code(oc):
+    assert isinstance(oc, int)
     if oc <= 0x7F:
         return FLAG_ASCII
     if 0xD800 <= oc <= 0xDFFF:

From pypy.commits at gmail.com  Thu Nov 23 09:50:35 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 06:50:35 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Utf8StringBuilder
Message-ID: <5a16e03b.cb3a1c0a.79405.30ff@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93138:9ede67aee27e
Date: 2017-11-23 15:49 +0100
http://bitbucket.org/pypy/pypy/changeset/9ede67aee27e/

Log:	Utf8StringBuilder

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -16,9 +16,11 @@
 """
 
 import sys
-from rpython.rlib.objectmodel import enforceargs, we_are_translated
+from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib import jit
+from rpython.rlib.signature import signature
+from rpython.rlib.types import char, none
 from rpython.rlib.rarithmetic import r_uint
 from rpython.rlib.unicodedata import unicodedb
 from rpython.rtyper.lltypesystem import lltype, rffi
@@ -316,6 +318,11 @@
         return res, flag
     raise CheckError(~res)
 
+def get_utf8_length_flag(s):
+    """ Get the length and flag out of valid utf8. For now just calls check_utf8
+    """
+    return check_utf8(s, True)
+
 @jit.elidable
 def _check_utf8(s, allow_surrogates, start, stop):
     pos = start
@@ -655,6 +662,53 @@
 
     return unicode_escape #, char_escape_helper
 
+class Utf8StringBuilder(object):
+    def __init__(self, size=0):
+        self._s = StringBuilder(size)
+        self._lgt = 0
+        self._flag = FLAG_ASCII
+
+    def append(self, s):
+        # for strings
+        self._s.append(s)
+        newlgt, newflag = get_utf8_length_flag(s)
+        self._lgt += newlgt
+        self._flag = combine_flags(self._flag, newflag)
+
+    @signature(char(), returns=none())
+    def append_char(self, s):
+        # for characters, ascii
+        self._lgt += 1
+        self._s.append(s)
+
+    def append_code(self, code):
+        self._flag = combine_flags(self._flag, get_flag_from_code(code))
+        self._lgt += 1
+        unichr_as_utf8_append(self._s, code, True)
+
+    def build(self):
+        return self._s.build()
+
+    def get_flag(self):
+        return self._flag
+
+    def get_length(self):
+        return self._lgt
+
+class Utf8StringIterator(object):
+    def __init__(self, utf8s):
+        self._utf8 = utf8s
+        self._end = len(utf8s)
+        self._pos = 0
+
+    def done(self):
+        return self._pos == self._end
+
+    def next(self):
+        ret = codepoint_at_pos(self._utf8, self._pos)
+        self._pos = next_codepoint_pos(self._utf8, self._pos)
+        return ret
+
 def decode_latin_1(s):
     if len(s) == 0:
         return s
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -139,3 +139,39 @@
     result = rutf8.surrogate_in_utf8(uni)
     expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff')
     assert result == expected
+
+ at given(strategies.text())
+def test_get_utf8_length_flag(u):
+    exp_lgt = len(u)
+    exp_flag = rutf8.FLAG_ASCII
+    for c in u:
+        if ord(c) > 0x7F:
+            exp_flag = rutf8.FLAG_REGULAR
+    lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8'))
+    assert lgt == exp_lgt
+    assert flag == exp_flag
+
+def test_utf8_string_builder():
+    s = rutf8.Utf8StringBuilder()
+    s.append("foo")
+    s.append_char("x")
+    assert s.get_flag() == rutf8.FLAG_ASCII
+    assert s.get_length() == 4
+    assert s.build() == "foox"
+    s.append(u"\u1234".encode("utf8"))
+    assert s.get_flag() == rutf8.FLAG_REGULAR
+    assert s.get_length() == 5
+    assert s.build().decode("utf8") == u"foox\u1234"
+    s.append("foo")
+    s.append_char("x")
+    assert s.get_flag() == rutf8.FLAG_REGULAR
+    assert s.get_length() == 9
+    assert s.build().decode("utf8") == u"foox\u1234foox"
+    s = rutf8.Utf8StringBuilder()
+    s.append_code(0x1234)
+    assert s.build().decode("utf8") == u"\u1234"
+    assert s.get_flag() == rutf8.FLAG_REGULAR
+    assert s.get_length() == 1
+    s.append_code(0xD800)
+    assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
+    assert s.get_length() == 2

From pypy.commits at gmail.com  Thu Nov 23 09:50:37 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 06:50:37 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge
Message-ID: <5a16e03d.c99edf0a.84d53.98d9@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93139:3e45feebc910
Date: 2017-11-23 15:49 +0100
http://bitbucket.org/pypy/pypy/changeset/3e45feebc910/

Log:	merge

diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py
--- a/pypy/module/_cffi_backend/ctypearray.py
+++ b/pypy/module/_cffi_backend/ctypearray.py
@@ -64,13 +64,10 @@
         elif space.isinstance_w(w_value, space.w_unicode):
             from pypy.module._cffi_backend import wchar_helper
             w_u = space.convert_arg_to_w_unicode(w_value)
-            if self.citem.size == 4:
+            if self.ctitem.size == 2:
+                length = wchar_helper.utf8_size_as_char16(w_u._utf8)
+            else:
                 length = w_u._len()
-            else:
-                if not w_u._has_surrogates():
-                    length = w_u._len()
-                else:
-                    length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len())
             return (w_value, length + 1)
         else:
             explicitlength = space.getindex_w(w_value, space.w_OverflowError)
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -40,16 +40,13 @@
         return ord(s[0])
 
     def cast_unicode(self, w_ob):
-        import pdb
-        pdb.set_trace()
         space = self.space
         w_u = space.convert_arg_to_w_unicode(w_ob)
         if w_u._len() != 1:
             raise oefmt(space.w_TypeError,
                         "cannot cast unicode string of length %d to ctype '%s'",
                         w_u._len(), self.name)
-        ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0)
-        return intmask(ordinal)
+        return rutf8.codepoint_at_pos(w_u._utf8, 0)
 
     def cast(self, w_ob):
         from pypy.module._cffi_backend import ctypeptr
@@ -175,21 +172,19 @@
                 return self.space.newint(value)    # r_uint => 'long' object
 
     def convert_to_object(self, cdata):
-        if self.is_signed_wchar:
-            code = ord(rffi.cast(rffi.CWCHARP, cdata)[0])
-            return self.space.newutf8(
-                rutf8.unichr_as_utf8(code), 1,
-                rutf8.get_flag_from_code(code))
-        else:
-            value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
-            try:
-                u = wchar_helper.ordinal_to_unicode(value)
-            except wchar_helper.OutOfRange as e:
-                raise oefmt(self.space.w_ValueError,
-                            "char32_t out of range for "
-                            "conversion to unicode: %s", hex(e.ordinal))
-            return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1,
-                rutf8.get_flag_from_code(ord(u)))
+        value = misc.read_raw_ulong_data(cdata, self.size)   # r_uint
+        try:
+            utf8 = rutf8.unichr_as_utf8(value, allow_surrogates=True)
+        except ValueError:
+            if self.is_signed_wchar:
+                s = hex(intmask(value))
+            else:
+                s = hex(value)
+            raise oefmt(self.space.w_ValueError,
+                        "%s out of range for conversion to unicode: %s",
+                        self.name, s)
+        flag = rutf8.get_flag_from_code(intmask(value))
+        return self.space.newutf8(utf8, 1, flag)
 
     def string(self, cdataobj, maxlen):
         with cdataobj as ptr:
@@ -200,7 +195,13 @@
         # returns a r_uint.  If self.size == 2, it is smaller than 0x10000
         space = self.space
         if space.isinstance_w(w_ob, space.w_unicode):
-            return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0)
+            w_u = space.convert_arg_to_w_unicode(w_ob)
+            if w_u._len() != 1:
+                raise self._convert_error("single character", w_ob)
+            ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0)
+            if self.size == 2 and ordinal > 0xFFFF:
+                raise self._convert_error("single character <= 0xFFFF", w_ob)
+            return r_uint(ordinal)
         elif (isinstance(w_ob, cdataobj.W_CData) and
                isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and
                w_ob.ctype.size == self.size):
@@ -214,15 +215,15 @@
 
     def unpack_ptr(self, w_ctypeptr, ptr, length):
         if self.size == 2:
-            u = wchar_helper.unicode_from_char16(ptr, length)
+            utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length)
         else:
             try:
-                u = wchar_helper.unicode_from_char32(ptr, length)
+                utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length)
             except wchar_helper.OutOfRange as e:
                 raise oefmt(self.space.w_ValueError,
-                            "char32_t out of range for "
-                            "conversion to unicode: %s", hex(e.ordinal))
-        return self.space.newunicode(u)
+                            "%s out of range for conversion to unicode: %s",
+                            self.name, hex(e.ordinal))
+        return self.space.newutf8(utf8, lgt, flag)
 
 
 class W_CTypePrimitiveSigned(W_CTypePrimitive):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -92,28 +92,20 @@
             if not space.isinstance_w(w_ob, space.w_unicode):
                 raise self._convert_error("unicode or list or tuple", w_ob)
             w_u = space.convert_arg_to_w_unicode(w_ob)
-            if self.size == 4:
+            s = w_u._utf8
+            if self.ctitem.size == 2:
+                n = wchar_helper.utf8_size_as_char16(s)
+            else:
                 n = w_u._len()
-            else:
-                if not w_u._has_surrogates():
-                    n = w_u._len()
-                else:
-                    n = wchar_helper.unicode_size_as_char16(w_u._utf8,
-                                                            w_u._len())
             if self.length >= 0 and n > self.length:
                 raise oefmt(space.w_IndexError,
                             "initializer unicode string is too long for '%s' "
                             "(got %d characters)", self.name, n)
             add_final_zero = (n != self.length)
             if self.ctitem.size == 2:
-                try:
-                    wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero)
-                except wchar_helper.OutOfRange as e:
-                    raise oefmt(self.space.w_ValueError,
-                                "unicode character ouf of range for "
-                                "conversion to char16_t: %s", hex(e.ordinal))
+                wchar_helper.utf8_to_char16(s, cdata, n, add_final_zero)
             else:
-                wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero)
+                wchar_helper.utf8_to_char32(s, cdata, n, add_final_zero)
         else:
             raise self._convert_error("list or tuple", w_ob)
 
@@ -334,8 +326,7 @@
             from pypy.module._cffi_backend import wchar_helper
             w_u = space.convert_arg_to_w_unicode(w_init)
             if self.ctitem.size == 2:
-                length = wchar_helper.unicode_size_as_char16(w_u._utf8,
-                                                             w_u._len())
+                length = wchar_helper.utf8_size_as_char16(w_u._utf8)
             else:
                 length = w_u._len()
             length += 1
diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py
@@ -0,0 +1,10 @@
+from hypothesis import given, strategies
+from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16
+
+
+
+ at given(strategies.text())
+def test_utf8_size_as_char16(u):
+    assert type(u) is unicode
+    length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u))
+    assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u)
diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py
--- a/pypy/module/_cffi_backend/wchar_helper.py
+++ b/pypy/module/_cffi_backend/wchar_helper.py
@@ -6,41 +6,6 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw
 
-SIZE_UNICODE = 4
-
-
-if SIZE_UNICODE == 4:
-    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
-        return unichr(intmask(ordinal))
-else:
-    def ordinal_to_unicode(ordinal):    # 'ordinal' is a r_uint
-        if ordinal <= 0xffff:
-            return unichr(intmask(ordinal))
-        elif ordinal <= 0x10ffff:
-            ordinal = intmask(ordinal - 0x10000)
-            return (unichr(0xD800 | (ordinal >> 10)) +
-                    unichr(0xDC00 | (ordinal & 0x3FF)))
-        else:
-            raise OutOfRange(ordinal)
-
-def is_surrogate(u, index):
-    return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and
-            unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF))
-
-def as_surrogate(u, index):
-    ordinal = (ord(u[index + 0]) - 0xD800) << 10
-    ordinal |= (ord(u[index + 1]) - 0xDC00)
-    return r_uint(ordinal + 0x10000)
-
-def unicode_to_ordinal(u):
-    if len(u) == 1:
-        u = ord(u[0])
-        return r_uint(u)
-    elif SIZE_UNICODE == 2:
-        if len(u) == 2 and is_surrogate(u, 0):
-            return r_uint(as_surrogate(u, 0))
-    raise ValueError
-
 
 class OutOfRange(Exception):
     ordinal = 0
@@ -49,59 +14,41 @@
         ordinal = intmask(rffi.cast(rffi.INT, ordinal))
         self.ordinal = ordinal
 
-def _unicode_from_wchar(ptr, length):
-    return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length)
+def utf8_from_char32(ptr, length):
+    # 'ptr' is a pointer to 'length' 32-bit integers
+    ptr = rffi.cast(rffi.UINTP, ptr)
+    u = StringBuilder(length)
+    j = 0
+    flag = rutf8.FLAG_ASCII
+    while j < length:
+        ch = intmask(ptr[j])
+        j += 1
+        flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch))
+        try:
+            rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True)
+        except ValueError:
+            raise OutOfRange(ch)
+    return u.build(), length, flag
 
-
-if SIZE_UNICODE == 2:
-    def unicode_from_char32(ptr, length):
-        # 'ptr' is a pointer to 'length' 32-bit integers
-        ptr = rffi.cast(rffi.UINTP, ptr)
-        alloc = length
-        for i in range(length):
-            if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF:
-                alloc += 1
-
-        u = [u'\x00'] * alloc
-        j = 0
-        for i in range(length):
-            ordinal = rffi.cast(lltype.Unsigned, ptr[i])
-            if ordinal > 0xFFFF:
-                if ordinal > 0x10FFFF:
-                    raise OutOfRange(ordinal)
-                ordinal = intmask(ordinal - 0x10000)
-                u[j] = unichr(0xD800 | (ordinal >> 10))
+def utf8_from_char16(ptr, length):
+    # 'ptr' is a pointer to 'length' 16-bit integers
+    ptr = rffi.cast(rffi.USHORTP, ptr)
+    u = StringBuilder(length)
+    j = 0
+    result_length = length
+    flag = rutf8.FLAG_ASCII
+    while j < length:
+        ch = intmask(ptr[j])
+        j += 1
+        if 0xD800 <= ch <= 0xDBFF and j < length:
+            ch2 = intmask(ptr[j])
+            if 0xDC00 <= ch2 <= 0xDFFF:
+                ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
                 j += 1
-                u[j] = unichr(0xDC00 | (ordinal & 0x3FF))
-                j += 1
-            else:
-                u[j] = unichr(intmask(ordinal))
-                j += 1
-        assert j == len(u)
-        return u''.join(u)
-
-    unicode_from_char16 = _unicode_from_wchar
-
-else:
-    unicode_from_char32 = _unicode_from_wchar
-
-    def unicode_from_char16(ptr, length):
-        # 'ptr' is a pointer to 'length' 16-bit integers
-        ptr = rffi.cast(rffi.USHORTP, ptr)
-        u = StringBuilder(length)
-        i = 0
-        j = 0
-        while j < length:
-            ch = intmask(ptr[j])
-            j += 1
-            if 0xD800 <= ch <= 0xDBFF and j < length:
-                ch2 = intmask(ptr[j])
-                if 0xDC00 <= ch2 <= 0xDFFF:
-                    ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000
-                    j += 1
-            rutf8.unichr_as_utf8_append(u, ch)
-            i += 1
-        return u.build()
+                result_length -= 1
+        flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch))
+        rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True)
+    return u.build(), result_length, flag
 
 
 @specialize.ll()
@@ -122,65 +69,44 @@
     return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen)
 
 
-def unicode_size_as_char16(u, len):
-    result = len
-    i = 0
-    while i < len(u):
-        code = rutf8.codepoint_at_pos(u, i)
-        if code > 0xFFFF:
-            result += 1
-        i = rutf8.next_codepoint_pos(u, i)
+def utf8_size_as_char16(u):
+    # Counts one per unichar in 'u', or two if they are greater than 0xffff.
+    TABLE = "\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x02"
+    result = 0
+    for c in u:
+        result += ord(TABLE[ord(c) >> 4])
     return result
 
-def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero):
-    # 'target_ptr' is a raw pointer to 'target_length' wchars;
-    # we assume here that target_length == len(u).
-    unichardata = rffi.cast(rffi.CWCHARP, target_ptr)
-    copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length)
+def utf8_to_char32(utf8, target_ptr, target_length, add_final_zero):
+    # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
+    # we assume (and check) that target_length == number of unichars in utf8.
+    unichardata = rffi.cast(rffi.UINTP, target_ptr)
+    i = 0
+    for j in range(target_length):
+        code = rutf8.codepoint_at_pos(utf8, i)
+        unichardata[j] = rffi.cast(rffi.UINT, code)
+        i = rutf8.next_codepoint_pos(utf8, i)
+    assert i == len(utf8)
     if add_final_zero:
-        unichardata[target_length] = u'\x00'
+        unichardata[target_length] = rffi.cast(rffi.UINT, 0)
 
-
-if SIZE_UNICODE == 2:
-    def unicode_to_char32(u, target_ptr, target_length, add_final_zero):
-        # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers;
-        # we assume here that target_length == unicode_size_as_char32(u).
-        ptr = rffi.cast(rffi.UINTP, target_ptr)
-        src_index = 0
-        last_surrogate_pos = len(u) - 2
-        for i in range(target_length):
-            if src_index <= last_surrogate_pos and is_surrogate(u, src_index):
-                ordinal = as_surrogate(u, src_index)
-                src_index += 2
-            else:
-                ordinal = r_uint(ord(u[src_index]))
-                src_index += 1
-            ptr[i] = rffi.cast(rffi.UINT, ordinal)
-        if add_final_zero:
-            ptr[target_length] = rffi.cast(rffi.UINT, 0)
-
-    unicode_to_char16 = _unicode_to_wchar
-
-else:
-    unicode_to_char32 = _unicode_to_wchar
-
-    def unicode_to_char16(u, target_ptr, target_length, add_final_zero):
-        # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
-        # we assume here that target_length == unicode_size_as_char16(u).
-        ptr = rffi.cast(rffi.USHORTP, target_ptr)
-        for uc in u:
-            ordinal = ord(uc)
-            if ordinal > 0xFFFF:
-                if ordinal > 0x10FFFF:
-                    raise OutOfRange(ordinal)
-                ordinal -= 0x10000
-                ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
-                ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
-                ptr = rffi.ptradd(ptr, 2)
-            else:
-                ptr[0] = rffi.cast(rffi.USHORT, ordinal)
-                ptr = rffi.ptradd(ptr, 1)
-        assert ptr == (
-            rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
-        if add_final_zero:
-            ptr[0] = rffi.cast(rffi.USHORT, 0)
+def utf8_to_char16(utf8, target_ptr, target_length, add_final_zero):
+    # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers;
+    # we assume (and check) that target_length == utf8_size_as_char16(utf8).
+    ptr = rffi.cast(rffi.USHORTP, target_ptr)
+    i = 0
+    while i < len(utf8):
+        ordinal = rutf8.codepoint_at_pos(utf8, i)
+        if ordinal > 0xFFFF:
+            ordinal -= 0x10000
+            ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10))
+            ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF))
+            ptr = rffi.ptradd(ptr, 2)
+        else:
+            ptr[0] = rffi.cast(rffi.USHORT, ordinal)
+            ptr = rffi.ptradd(ptr, 1)
+        i = rutf8.next_codepoint_pos(utf8, i)
+    assert ptr == (
+        rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length))
+    if add_final_zero:
+        ptr[0] = rffi.cast(rffi.USHORT, 0)
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -460,6 +460,7 @@
     )))))
 
 def get_flag_from_code(oc):
+    assert isinstance(oc, int)
     if oc <= 0x7F:
         return FLAG_ASCII
     if 0xD800 <= oc <= 0xDFFF:

From pypy.commits at gmail.com  Thu Nov 23 09:57:40 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 06:57:40 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: provide explicit examples
Message-ID: <5a16e1e4.a8a0df0a.fd2c9.6e63@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93140:d24fe4f59c96
Date: 2017-11-23 15:57 +0100
http://bitbucket.org/pypy/pypy/changeset/d24fe4f59c96/

Log:	provide explicit examples

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -30,6 +30,7 @@
 
 @settings(max_examples=10000)
 @given(strategies.binary(), strategies.booleans())
+ at example('\xf1\x80\x80\x80', False)
 def test_check_utf8(s, allow_surrogates):
     _test_check_utf8(s, allow_surrogates)
 
@@ -134,19 +135,23 @@
     assert repr(u) == repr_func(u.encode('utf8'))
 
 @given(strategies.lists(strategies.characters()))
+ at example([u'\ud800', u'\udc00'])
 def test_surrogate_in_utf8(unichars):
     uni = u''.join(unichars).encode('utf-8')
     result = rutf8.surrogate_in_utf8(uni)
     expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff')
     assert result == expected
 
- at given(strategies.text())
-def test_get_utf8_length_flag(u):
+ at given(strategies.lists(strategies.characters()))
+def test_get_utf8_length_flag(unichars):
+    u = u''.join(unichars)
     exp_lgt = len(u)
     exp_flag = rutf8.FLAG_ASCII
     for c in u:
         if ord(c) > 0x7F:
             exp_flag = rutf8.FLAG_REGULAR
+        if 0xD800 <= ord(c) <= 0xDFFF:
+            exp_flag = rutf8.FLAG_HAS_SURROGATES
     lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8'))
     assert lgt == exp_lgt
     assert flag == exp_flag

From pypy.commits at gmail.com  Thu Nov 23 10:15:50 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 07:15:50 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix test on narrow host
Message-ID: <5a16e626.54d91c0a.8efdd.759f@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93141:eb564d44a7c8
Date: 2017-11-23 16:15 +0100
http://bitbucket.org/pypy/pypy/changeset/eb564d44a7c8/

Log:	fix test on narrow host

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -57,12 +57,13 @@
         assert ~(length) == e.start
     else:
         assert valid
-        assert length == len(u)
         if flag == rutf8.FLAG_ASCII:
             s.decode('ascii') # assert did not raise
         elif flag == rutf8.FLAG_HAS_SURROGATES:
             assert allow_surrogates
             assert _has_surrogates(s)
+        if sys.maxunicode == 0x10FFFF or not _has_surrogates(s):
+            assert length == len(u)
 
 @given(strategies.characters())
 def test_next_pos(uni):

From pypy.commits at gmail.com  Thu Nov 23 10:18:19 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 07:18:19 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix tests on narrow host
Message-ID: <5a16e6bb.9085df0a.341f4.2ea2@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93142:fa3bcbe5b09f
Date: 2017-11-23 16:17 +0100
http://bitbucket.org/pypy/pypy/changeset/fa3bcbe5b09f/

Log:	fix tests on narrow host

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -138,7 +138,7 @@
 @given(strategies.lists(strategies.characters()))
 @example([u'\ud800', u'\udc00'])
 def test_surrogate_in_utf8(unichars):
-    uni = u''.join(unichars).encode('utf-8')
+    uni = ''.join([u.encode('utf8') for u in unichars])
     result = rutf8.surrogate_in_utf8(uni)
     expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff')
     assert result == expected
@@ -153,6 +153,7 @@
             exp_flag = rutf8.FLAG_REGULAR
         if 0xD800 <= ord(c) <= 0xDFFF:
             exp_flag = rutf8.FLAG_HAS_SURROGATES
+            break
     lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8'))
     assert lgt == exp_lgt
     assert flag == exp_flag

From pypy.commits at gmail.com  Thu Nov 23 10:32:44 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 07:32:44 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: more tests
Message-ID: <5a16ea1c.1cbf1c0a.deee6.077e@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93143:e4a568e4514c
Date: 2017-11-23 16:32 +0100
http://bitbucket.org/pypy/pypy/changeset/e4a568e4514c/

Log:	more tests

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -154,8 +154,9 @@
         if 0xD800 <= ord(c) <= 0xDFFF:
             exp_flag = rutf8.FLAG_HAS_SURROGATES
             break
-    lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8'))
-    assert lgt == exp_lgt
+    lgt, flag = rutf8.get_utf8_length_flag(''.join([c.encode('utf8') for c in u]))
+    if exp_flag != rutf8.FLAG_HAS_SURROGATES:
+        assert lgt == exp_lgt
     assert flag == exp_flag
 
 def test_utf8_string_builder():
@@ -182,3 +183,11 @@
     s.append_code(0xD800)
     assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
     assert s.get_length() == 2
+
+ at given(strategies.text())
+def test_utf8_iterator(arg):
+    u = rutf8.Utf8StringIterator(arg.encode('utf8'))
+    l = []
+    while not u.done():
+        l.append(unichr(u.next()))
+    assert list(arg) == l

From pypy.commits at gmail.com  Thu Nov 23 10:46:47 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 07:46:47 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge default
Message-ID: <5a16ed67.cb3a1c0a.79405.4789@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93144:177352fb8cf4
Date: 2017-11-23 16:46 +0100
http://bitbucket.org/pypy/pypy/changeset/177352fb8cf4/

Log:	merge default

diff too long, truncating to 2000 out of 7577 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -71,6 +71,8 @@
 ^lib_pypy/.+.c$
 ^lib_pypy/.+.o$
 ^lib_pypy/.+.so$
+^lib_pypy/.+.pyd$
+^lib_pypy/Release/
 ^pypy/doc/discussion/.+\.html$
 ^include/.+\.h$
 ^include/.+\.inl$
diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis
diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_bytes.py
@@ -0,0 +1,84 @@
+from hypothesis import strategies as st
+from hypothesis import given, example
+
+st_bytestring = st.binary() | st.binary().map(bytearray)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st_bytestring, st_bytestring)
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_startswith_start(u, v, start):
+    expected = u[start:].startswith(v) if v else (start <= len(u))
+    assert u.startswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st_bytestring, st_bytestring)
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_endswith_2(u, v, start):
+    expected = u[start:].endswith(v) if v else (start <= len(u))
+    assert u.endswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py
--- a/extra_tests/test_unicode.py
+++ b/extra_tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
 import pytest
 from hypothesis import strategies as st
 from hypothesis import given, settings, example
@@ -32,3 +33,89 @@
 @given(s=st.text())
 def test_composition(s, norm1, norm2, norm3):
     assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s)
+
+ at given(st.text(), st.text(), st.text())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.text(), st.text())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_startswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.startswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.text(), st.text())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_endswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.endswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py
--- a/lib-python/2.7/inspect.py
+++ b/lib-python/2.7/inspect.py
@@ -40,6 +40,10 @@
 import linecache
 from operator import attrgetter
 from collections import namedtuple
+try:
+    from cpyext import is_cpyext_function as _is_cpyext_function
+except ImportError:
+    _is_cpyext_function = lambda obj: False
 
 # These constants are from Include/code.h.
 CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8
@@ -230,7 +234,7 @@
         __doc__         documentation string
         __name__        original name of this function or method
         __self__        instance to which a method is bound, or None"""
-    return isinstance(object, types.BuiltinFunctionType)
+    return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object)
 
 def isroutine(object):
     """Return true if the object is any kind of function or method."""
diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py
--- a/lib-python/2.7/test/test_urllib2net.py
+++ b/lib-python/2.7/test/test_urllib2net.py
@@ -286,7 +286,7 @@
             self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
             u.close()
 
-    FTP_HOST = 'ftp://ftp.debian.org/debian/'
+    FTP_HOST = 'ftp://www.pythontest.net/'
 
     def test_ftp_basic(self):
         self.assertIsNone(socket.getdefaulttimeout())
diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py
--- a/lib-python/2.7/warnings.py
+++ b/lib-python/2.7/warnings.py
@@ -43,11 +43,12 @@
         unicodetype = unicode
     except NameError:
         unicodetype = ()
+    template = "%s: %s: %s\n"
     try:
         message = str(message)
     except UnicodeEncodeError:
-        pass
-    s =  "%s: %s: %s\n" % (lineno, category.__name__, message)
+        template = unicode(template)
+    s = template % (lineno, category.__name__, message)
     line = linecache.getline(filename, lineno) if line is None else line
     if line:
         line = line.strip()
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,60 +8,63 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
-        if '_type_' in typedict:
-            ffiarray = _rawffi.Array(typedict['_type_']._ffishape_)
-            res._ffiarray = ffiarray
-            subletter = getattr(typedict['_type_'], '_type_', None)
-            if subletter == 'c':
-                def getvalue(self):
-                    return _rawffi.charp2string(self._buffer.buffer,
-                                                self._length_)
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, str):
-                        _rawffi.rawstring2charp(self._buffer.buffer, val)
-                    else:
-                        for i in range(len(val)):
-                            self[i] = val[i]
-                    if len(val) < self._length_:
-                        self._buffer[len(val)] = '\x00'
-                res.value = property(getvalue, setvalue)
 
-                def getraw(self):
-                    return _rawffi.charp2rawstring(self._buffer.buffer,
-                                                   self._length_)
+        if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
+            return res
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
+        ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
+        subletter = getattr(res._type_, '_type_', None)
+        if subletter == 'c':
+            def getvalue(self):
+                return _rawffi.charp2string(self._buffer.buffer,
+                                            self._length_)
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, str):
+                    _rawffi.rawstring2charp(self._buffer.buffer, val)
+                else:
+                    for i in range(len(val)):
+                        self[i] = val[i]
+                if len(val) < self._length_:
+                    self._buffer[len(val)] = b'\x00'
+            res.value = property(getvalue, setvalue)
 
-                def setraw(self, buffer):
-                    if len(buffer) > self._length_:
-                        raise ValueError("%r too long" % (buffer,))
-                    _rawffi.rawstring2charp(self._buffer.buffer, buffer)
-                res.raw = property(getraw, setraw)
-            elif subletter == 'u':
-                def getvalue(self):
-                    return _rawffi.wcharp2unicode(self._buffer.buffer,
-                                                  self._length_)
+            def getraw(self):
+                return _rawffi.charp2rawstring(self._buffer.buffer,
+                                               self._length_)
 
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, unicode):
-                        target = self._buffer
-                    else:
-                        target = self
-                    for i in range(len(val)):
-                        target[i] = val[i]
-                    if len(val) < self._length_:
-                        target[len(val)] = u'\x00'
-                res.value = property(getvalue, setvalue)
-                
-            if '_length_' in typedict:
-                res._ffishape_ = (ffiarray, typedict['_length_'])
-                res._fficompositesize_ = res._sizeofinstances()
-        else:
-            res._ffiarray = None
+            def setraw(self, buffer):
+                if len(buffer) > self._length_:
+                    raise ValueError("%r too long" % (buffer,))
+                _rawffi.rawstring2charp(self._buffer.buffer, buffer)
+            res.raw = property(getraw, setraw)
+        elif subletter == 'u':
+            def getvalue(self):
+                return _rawffi.wcharp2unicode(self._buffer.buffer,
+                                              self._length_)
+
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, unicode):
+                    target = self._buffer
+                else:
+                    target = self
+                for i in range(len(val)):
+                    target[i] = val[i]
+                if len(val) < self._length_:
+                    target[len(val)] = u'\x00'
+            res.value = property(getvalue, setvalue)
+
+        res._ffishape_ = (ffiarray, res._length_)
+        res._fficompositesize_ = res._sizeofinstances()
         return res
 
     from_address = cdata_from_address
@@ -156,7 +159,7 @@
     l = [self[i] for i in range(start, stop, step)]
     letter = getattr(self._type_, '_type_', None)
     if letter == 'c':
-        return "".join(l)
+        return b"".join(l)
     if letter == 'u':
         return u"".join(l)
     return l
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -176,6 +176,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -114,7 +114,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __nonzero__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -291,6 +291,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1027,21 +1027,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -16,4 +16,10 @@
     with fp:
         imp.load_module('_testcapi', fp, filename, description)
 except ImportError:
+    if os.name == 'nt':
+        # hack around finding compilers on win32
+        try:
+            import setuptools
+        except ImportError:
+            pass
     _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)
diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py
--- a/lib_pypy/_tkinter/app.py
+++ b/lib_pypy/_tkinter/app.py
@@ -119,7 +119,7 @@
                              tklib.TCL_GLOBAL_ONLY)
 
         # This is used to get the application class for Tk 4.1 and up
-        argv0 = className.lower()
+        argv0 = className.lower().encode('ascii')
         tklib.Tcl_SetVar(self.interp, "argv0", argv0,
                          tklib.TCL_GLOBAL_ONLY)
 
@@ -180,6 +180,9 @@
             if err == tklib.TCL_ERROR:
                 self.raiseTclError()
 
+    def interpaddr(self):
+        return int(tkffi.cast('size_t', self.interp))
+
     def _var_invoke(self, func, *args, **kwargs):
         if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread():
             # The current thread is not the interpreter thread.
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -182,6 +182,57 @@
 technical difficulties.
 
 
+What about numpy, numpypy, micronumpy?
+--------------------------------------
+
+Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy.  It
+has two pieces:
+
+  * the builtin module :source:`pypy/module/micronumpy`: this is written in
+    RPython and roughly covers the content of the ``numpy.core.multiarray``
+    module. Confusingly enough, this is available in PyPy under the name
+    ``_numpypy``.  It is included by default in all the official releases of
+    PyPy (but it might be dropped in the future).
+
+  * a fork_ of the official numpy repository maintained by us and informally
+    called ``numpypy``: even more confusing, the name of the repo on bitbucket
+    is ``numpy``.  The main difference with the upstream numpy, is that it is
+    based on the micronumpy module written in RPython, instead of of
+    ``numpy.core.multiarray`` which is written in C.
+
+Moreover, it is also possible to install the upstream version of ``numpy``:
+its core is written in C and it runs on PyPy under the cpyext compatibility
+layer. This is what you get if you do ``pypy -m pip install numpy``.
+
+
+Should I install numpy or numpypy?
+-----------------------------------
+
+TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip
+install numpy``.  You might also be interested in using the experimental `PyPy
+binary wheels`_ to save compilation time.
+
+The upstream ``numpy`` is written in C, and runs under the cpyext
+compatibility layer.  Nowadays, cpyext is mature enough that you can simply
+use the upstream ``numpy``, since it passes 99.9% of the test suite. At the
+moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext
+is infamously slow, and thus it has worse performance compared to
+``numpypy``. However, we are actively working on improving it, as we expect to
+reach the same speed, eventually.
+
+On the other hand, ``numpypy`` is more JIT-friendly and very fast to call,
+since it is written in RPython: but it is a reimplementation, and it's hard to
+be completely compatible: over the years the project slowly matured and
+eventually it was able to call out to the LAPACK and BLAS libraries to speed
+matrix calculations, and reached around an 80% parity with the upstream
+numpy. However, 80% is far from 100%.  Since cpyext/numpy compatibility is
+progressing fast, we have discontinued support for ``numpypy``.
+
+.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html
+.. _fork: https://bitbucket.org/pypy/numpy
+.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels
+
+
 Is PyPy more clever than CPython about Tail Calls?
 --------------------------------------------------
 
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,19 @@
 
 .. branch: docs-osx-brew-openssl
 
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'"
+    binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -85,13 +85,17 @@
     # permissive parsing of the given list of tokens; it relies on
     # the real parsing done afterwards to give errors.
     it.skip_newlines()
-    it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
-    if it.skip(pygram.tokens.STRING):
-        it.skip_newlines()
 
-    while (it.skip_name("from") and
+    docstring_possible = True
+    while True:
+        it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
+        if docstring_possible and it.skip(pygram.tokens.STRING):
+            it.skip_newlines()
+            docstring_possible = False
+        if not (it.skip_name("from") and
            it.skip_name("__future__") and
            it.skip_name("import")):
+            break
         it.skip(pygram.tokens.LPAR)    # optionally
         # return in 'last_position' any line-column pair that points
         # somewhere inside the last __future__ import statement
diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py
--- a/pypy/interpreter/pyparser/test/test_future.py
+++ b/pypy/interpreter/pyparser/test/test_future.py
@@ -208,3 +208,13 @@
          'from __future__ import with_statement;')
     f = run(s, (2, 23))
     assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT
+
+def test_future_doc_future():
+    # for some reason people do this :-[
+    s = '''
+from  __future__ import generators
+"Docstring"
+from  __future__ import division
+    '''
+    f = run(s, (4, 24))
+    assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -8,6 +8,35 @@
         cls.w_translated = cls.space.wrap(
             os.path.join(os.path.dirname(__file__),
                          'test_translated.py'))
+        cls.w_stack = cls.space.appexec([], """():
+            import sys
+            def stack(f=None):
+                '''
+                get the call-stack of the caller or the specified frame
+                '''
+                if f is None:
+                    f = sys._getframe(1)
+                res = []
+                seen = set()
+                while f:
+                    if f in seen:
+                        # frame cycle
+                        res.append('...')
+                        break
+                    if f.f_code.co_name == 'runtest':
+                        # if we are running with -A, cut all the stack above
+                        # the test function
+                        break
+                    seen.add(f)
+                    res.append(f.f_code.co_name)
+                    f = f.f_back
+                #print res
+                return res
+            return stack
+       """)
+        if cls.runappdirect:
+            # make sure that "self.stack" does not pass the self
+            cls.w_stack = staticmethod(cls.w_stack.im_func)
 
     def test_new_empty(self):
         from _continuation import continulet
@@ -290,66 +319,100 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
-        def g(c):
+        def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
+            #
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            #
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        assert stack() == ['test_f_back']
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
+        #
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main', 'test_f_back']
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -5,6 +5,7 @@
     py.test.skip("to run on top of a translated pypy-c")
 
 import sys, random
+from rpython.tool.udir import udir
 
 # ____________________________________________________________
 
@@ -92,6 +93,33 @@
         from pypy.conftest import option
         if not option.runappdirect:
             py.test.skip("meant only for -A run")
+        cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof')))
+
+    def test_vmprof(self):
+        """
+        The point of this test is to check that we do NOT segfault.  In
+        particular, we need to ensure that vmprof does not sample the stack in
+        the middle of a switch, else we read nonsense.
+        """
+        try:
+            import _vmprof
+        except ImportError:
+            py.test.skip("no _vmprof")
+        #
+        def switch_forever(c):
+            while True:
+                c.switch()
+        #
+        f = open(self.vmprof_file, 'w+b')
+        _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False)
+        c = _continuation.continulet(switch_forever)
+        for i in range(10**7):
+            if i % 100000 == 0:
+                print i
+            c.switch()
+        _vmprof.disable()
+        f.close()
+
 
 def _setup():
     for _i in range(20):
diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py
--- a/pypy/module/_cppyy/__init__.py
+++ b/pypy/module/_cppyy/__init__.py
@@ -7,7 +7,7 @@
     interpleveldefs = {
         '_resolve_name'          : 'interp_cppyy.resolve_name',
         '_scope_byname'          : 'interp_cppyy.scope_byname',
-        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_is_template'           : 'interp_cppyy.is_template',
         '_std_string_name'       : 'interp_cppyy.std_string_name',
         '_set_class_generator'   : 'interp_cppyy.set_class_generator',
         '_set_function_generator': 'interp_cppyy.set_function_generator',
@@ -15,7 +15,9 @@
         '_get_nullptr'           : 'interp_cppyy.get_nullptr',
         'CPPClassBase'           : 'interp_cppyy.W_CPPClass',
         'addressof'              : 'interp_cppyy.addressof',
+        '_bind_object'           : 'interp_cppyy._bind_object',
         'bind_object'            : 'interp_cppyy.bind_object',
+        'move'                   : 'interp_cppyy.move',
     }
 
     appleveldefs = {
diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py
--- a/pypy/module/_cppyy/capi/loadable_capi.py
+++ b/pypy/module/_cppyy/capi/loadable_capi.py
@@ -217,7 +217,8 @@
             'method_req_args'          : ([c_scope, c_index],         c_int),
             'method_arg_type'          : ([c_scope, c_index, c_int],  c_ccharp),
             'method_arg_default'       : ([c_scope, c_index, c_int],  c_ccharp),
-            'method_signature'         : ([c_scope, c_index],         c_ccharp),
+            'method_signature'         : ([c_scope, c_index, c_int],  c_ccharp),
+            'method_prototype'         : ([c_scope, c_index, c_int],  c_ccharp),
 
             'method_is_template'       : ([c_scope, c_index],         c_int),
             'method_num_template_args' : ([c_scope, c_index],         c_int),
@@ -498,9 +499,12 @@
 def c_method_arg_default(space, cppscope, index, arg_index):
     args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)]
     return charp2str_free(space, call_capi(space, 'method_arg_default', args))
-def c_method_signature(space, cppscope, index):
-    args = [_ArgH(cppscope.handle), _ArgL(index)]
+def c_method_signature(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
     return charp2str_free(space, call_capi(space, 'method_signature', args))
+def c_method_prototype(space, cppscope, index, show_formalargs=True):
+    args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)]
+    return charp2str_free(space, call_capi(space, 'method_prototype', args))
 
 def c_method_is_template(space, cppscope, index):
     args = [_ArgH(cppscope.handle), _ArgL(index)]
diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py
--- a/pypy/module/_cppyy/converter.py
+++ b/pypy/module/_cppyy/converter.py
@@ -4,7 +4,7 @@
 
 from rpython.rtyper.lltypesystem import rffi, lltype
 from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat
-from rpython.rlib import rfloat
+from rpython.rlib import rfloat, rawrefcount
 
 from pypy.module._rawffi.interp_rawffi import letter2tp
 from pypy.module._rawffi.array import W_Array, W_ArrayInstance
@@ -21,9 +21,9 @@
 # match for the qualified type.
 
 
-def get_rawobject(space, w_obj):
+def get_rawobject(space, w_obj, can_be_None=True):
     from pypy.module._cppyy.interp_cppyy import W_CPPClass
-    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True)
+    cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None)
     if cppinstance:
         rawobject = cppinstance.get_rawobject()
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
@@ -48,17 +48,16 @@
     return capi.C_NULL_OBJECT
 
 def is_nullpointer_specialcase(space, w_obj):
-    # 0, None, and nullptr may serve as "NULL", check for any of them
+    # 0 and nullptr may serve as "NULL"
 
     # integer 0
     try:
         return space.int_w(w_obj) == 0
     except Exception:
         pass
-    # None or nullptr
+    # C++-style nullptr
     from pypy.module._cppyy import interp_cppyy
-    return space.is_true(space.is_(w_obj, space.w_None)) or \
-        space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
+    return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space)))
 
 def get_rawbuffer(space, w_obj):
     # raw buffer
@@ -74,7 +73,7 @@
             return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space)))
     except Exception:
         pass
-    # pre-defined NULL
+    # pre-defined nullptr
     if is_nullpointer_specialcase(space, w_obj):
         return rffi.cast(rffi.VOIDP, 0)
     raise TypeError("not an addressable buffer")
@@ -392,6 +391,7 @@
     _immutable_fields_ = ['typecode']
     typecode = 'g'
 
+
 class CStringConverter(TypeConverter):
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.LONGP, address)
@@ -408,18 +408,27 @@
     def free_argument(self, space, arg, call_local):
         lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw')
 
+class CStringConverterWithSize(CStringConverter):
+    _immutable_fields_ = ['size']
+
+    def __init__(self, space, extra):
+        self.size = extra
+
+    def from_memory(self, space, w_obj, w_pycppclass, offset):
+        address = self._get_raw_address(space, w_obj, offset)
+        charpptr = rffi.cast(rffi.CCHARP, address)
+        strsize = self.size
+        if charpptr[self.size-1] == '\0':
+            strsize = self.size-1  # rffi will add \0 back
+        return space.newbytes(rffi.charpsize2str(charpptr, strsize))
+
 
 class VoidPtrConverter(TypeConverter):
     def _unwrap_object(self, space, w_obj):
         try:
             obj = get_rawbuffer(space, w_obj)
         except TypeError:
-            try:
-                # TODO: accept a 'capsule' rather than naked int
-                # (do accept int(0), though)
-                obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj))
-            except Exception:
-                obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
+            obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False))
         return obj
 
     def cffi_type(self, space):
@@ -463,12 +472,12 @@
     def convert_argument(self, space, w_obj, address, call_local):
         x = rffi.cast(rffi.VOIDPP, address)
         ba = rffi.cast(rffi.CCHARP, address)
-        r = rffi.cast(rffi.VOIDPP, call_local)
         try:
-            r[0] = get_rawbuffer(space, w_obj)
+            x[0] = get_rawbuffer(space, w_obj)
         except TypeError:
+            r = rffi.cast(rffi.VOIDPP, call_local)
             r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj))
-        x[0] = rffi.cast(rffi.VOIDP, call_local)
+            x[0] = rffi.cast(rffi.VOIDP, call_local)
         ba[capi.c_function_arg_typeoffset(space)] = self.typecode
 
     def finalize_call(self, space, w_obj, call_local):
@@ -495,9 +504,13 @@
     def _unwrap_object(self, space, w_obj):
         from pypy.module._cppyy.interp_cppyy import W_CPPClass
         if isinstance(w_obj, W_CPPClass):
-            if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl):
+            from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                # reject moves as all are explicit
+                raise ValueError("lvalue expected")
+            if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl):
                 rawobject = w_obj.get_rawobject()
-                offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1)
+                offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1)
                 obj_address = capi.direct_ptradd(rawobject, offset)
                 return rffi.cast(capi.C_OBJECT, obj_address)
         raise oefmt(space.w_TypeError,
@@ -518,6 +531,17 @@
         x = rffi.cast(rffi.VOIDPP, address)
         x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj))
 
+class InstanceMoveConverter(InstanceRefConverter):
+    def _unwrap_object(self, space, w_obj):
+        # moving is same as by-ref, but have to check that move is allowed
+        from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE
+        if isinstance(w_obj, W_CPPClass):
+            if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE:
+                w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE
+                return InstanceRefConverter._unwrap_object(self, space, w_obj)
+        raise oefmt(space.w_ValueError, "object is not an rvalue")
+
+
 class InstanceConverter(InstanceRefConverter):
 
     def convert_argument_libffi(self, space, w_obj, address, call_local):
@@ -527,7 +551,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         self._is_abstract(space)
@@ -548,7 +572,7 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False)
+        return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False)
 
     def to_memory(self, space, w_obj, w_value, offset):
         address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset))
@@ -582,8 +606,8 @@
     def from_memory(self, space, w_obj, w_pycppclass, offset):
         address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset))
         from pypy.module._cppyy import interp_cppyy
-        return interp_cppyy.wrap_cppobject(space, address, self.clsdecl,
-                                           do_cast=False, is_ref=True)
+        return interp_cppyy.wrap_cppinstance(
+            space, address, self.clsdecl, do_cast=False, is_ref=True)
 
 class StdStringConverter(InstanceConverter):
 
@@ -606,7 +630,7 @@
             assign = self.clsdecl.get_overload("__assign__")
             from pypy.module._cppyy import interp_cppyy
             assign.call(
-                interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value])
+                interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value])
         except Exception:
             InstanceConverter.to_memory(self, space, w_obj, w_value, offset)
 
@@ -672,7 +696,7 @@
 
 _converters = {}         # builtin and custom types
 _a_converters = {}       # array and ptr versions of above
-def get_converter(space, name, default):
+def get_converter(space, _name, default):
     # The matching of the name to a converter should follow:
     #   1) full, exact match
     #       1a) const-removed match
@@ -680,9 +704,9 @@
     #   3) accept ref as pointer (for the stubs, const& can be
     #       by value, but that does not work for the ffi path)
     #   4) generalized cases (covers basically all user classes)
-    #   5) void converter, which fails on use
+    #   5) void* or void converter (which fails on use)
 
-    name = capi.c_resolve_name(space, name)
+    name = capi.c_resolve_name(space, _name)
 
     #   1) full, exact match
     try:
@@ -701,7 +725,7 @@
     clean_name = capi.c_resolve_name(space, helper.clean_type(name))
     try:
         # array_index may be negative to indicate no size or no size found
-        array_size = helper.array_size(name)
+        array_size = helper.array_size(_name)     # uses original arg
         return _a_converters[clean_name+compound](space, array_size)
     except KeyError:
         pass
@@ -719,6 +743,8 @@
             return InstancePtrConverter(space, clsdecl)
         elif compound == "&":
             return InstanceRefConverter(space, clsdecl)
+        elif compound == "&&":
+            return InstanceMoveConverter(space, clsdecl)
         elif compound == "**":
             return InstancePtrPtrConverter(space, clsdecl)
         elif compound == "":
@@ -726,11 +752,13 @@
     elif capi.c_is_enum(space, clean_name):
         return _converters['unsigned'](space, default)
 
-    #   5) void converter, which fails on use
-    #
+    #   5) void* or void converter (which fails on use)
+    if 0 <= compound.find('*'):
+        return VoidPtrConverter(space, default)  # "user knows best"
+
     # return a void converter here, so that the class can be build even
-    # when some types are unknown; this overload will simply fail on use
-    return VoidConverter(space, name)
+    # when some types are unknown
+    return VoidConverter(space, name)            # fails on use
 
 
 _converters["bool"]                     = BoolConverter
@@ -847,6 +875,10 @@
         for name in names:
             _a_converters[name+'[]'] = ArrayConverter
             _a_converters[name+'*']  = PtrConverter
+
+    # special case, const char* w/ size and w/o '\0'
+    _a_converters["const char[]"] = CStringConverterWithSize
+
 _build_array_converters()
 
 # add another set of aliased names
diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py
--- a/pypy/module/_cppyy/executor.py
+++ b/pypy/module/_cppyy/executor.py
@@ -159,7 +159,7 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
         return pyres
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
@@ -167,7 +167,7 @@
         result = rffi.ptradd(buffer, cif_descr.exchange_result)
         from pypy.module._cppyy import interp_cppyy
         ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
 class InstancePtrPtrExecutor(InstancePtrExecutor):
 
@@ -176,7 +176,7 @@
         voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args)
         ref_address = rffi.cast(rffi.VOIDPP, voidp_result)
         ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0])
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
@@ -188,8 +188,8 @@
         from pypy.module._cppyy import interp_cppyy
         long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass)
         ptr_result = rffi.cast(capi.C_OBJECT, long_result)
-        return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass,
-                                           do_cast=False, python_owns=True, fresh=True)
+        return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass,
+                                             do_cast=False, python_owns=True, fresh=True)
 
     def execute_libffi(self, space, cif_descr, funcaddr, buffer):
         from pypy.module._cppyy.interp_cppyy import FastCallNotPossible
diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h
--- a/pypy/module/_cppyy/include/capi.h
+++ b/pypy/module/_cppyy/include/capi.h
@@ -19,14 +19,15 @@
     RPY_EXTERN
     int cppyy_num_scopes(cppyy_scope_t parent);
     RPY_EXTERN
-    char* cppyy_scope_name(cppyy_scope_t parent, int iscope);
-
+    char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope);
     RPY_EXTERN
     char* cppyy_resolve_name(const char* cppitem_name);
     RPY_EXTERN
     cppyy_scope_t cppyy_get_scope(const char* scope_name);
     RPY_EXTERN
     cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj);
+    RPY_EXTERN
+    size_t cppyy_size_of(cppyy_type_t klass);
 
     /* memory management ------------------------------------------------------ */
     RPY_EXTERN
@@ -120,6 +121,8 @@
     RPY_EXTERN
     char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
+    char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx);
+    RPY_EXTERN
     char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx);
     RPY_EXTERN
     int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx);
@@ -130,7 +133,9 @@
     RPY_EXTERN
     char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index);
     RPY_EXTERN
-    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx);
+    char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
+    RPY_EXTERN
+    char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs);
 
     RPY_EXTERN
     int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx);
@@ -147,8 +152,12 @@
 
     /* method properties ------------------------------------------------------ */
     RPY_EXTERN
+    int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx);
     RPY_EXTERN
+    int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx);
+    RPY_EXTERN
     int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx);
 
     /* data member reflection information ------------------------------------- */
diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py
--- a/pypy/module/_cppyy/interp_cppyy.py
+++ b/pypy/module/_cppyy/interp_cppyy.py
@@ -2,7 +2,7 @@
 
 from pypy.interpreter.error import OperationError, oefmt
 from pypy.interpreter.gateway import interp2app, unwrap_spec
-from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
 from pypy.interpreter.baseobjspace import W_Root
 
 from rpython.rtyper.lltypesystem import rffi, lltype, llmemory
@@ -15,6 +15,10 @@
 from pypy.module._cppyy import converter, executor, ffitypes, helper
 
 
+INSTANCE_FLAGS_PYTHON_OWNS = 0x0001
+INSTANCE_FLAGS_IS_REF      = 0x0002
+INSTANCE_FLAGS_IS_R_VALUE  = 0x0004
+
 class FastCallNotPossible(Exception):
     pass
 
@@ -33,16 +37,21 @@
 
 class State(object):
     def __init__(self, space):
+        # final scoped name -> opaque handle
         self.cppscope_cache = {
-            "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) }
+            'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') }
+        # opaque handle -> app-level python class
+        self.cppclass_registry = {}
+        # app-level class generator callback
+        self.w_clgen_callback = None
+        # app-level function generator callback (currently not used)
+        self.w_fngen_callback = None
+        # C++11's nullptr
         self.w_nullptr = None
-        self.cpptemplate_cache = {}
-        self.cppclass_registry = {}
-        self.w_clgen_callback = None
-        self.w_fngen_callback = None
 
 def get_nullptr(space):
-    if hasattr(space, "fake"):
+    # construct a unique address that compares to NULL, serves as nullptr
+    if hasattr(space, 'fake'):
         raise NotImplementedError
     state = space.fromcache(State)
     if state.w_nullptr is None:
@@ -58,52 +67,48 @@
         state.w_nullptr = nullarr
     return state.w_nullptr
 
- at unwrap_spec(name='text')
-def resolve_name(space, name):
-    return space.newtext(capi.c_resolve_name(space, name))
+ at unwrap_spec(scoped_name='text')
+def resolve_name(space, scoped_name):
+    return space.newtext(capi.c_resolve_name(space, scoped_name))
 
- at unwrap_spec(name='text')
-def scope_byname(space, name):
-    true_name = capi.c_resolve_name(space, name)
 
+# memoized lookup of handles by final, scoped, name of classes/namespaces
+ at unwrap_spec(final_scoped_name='text')
+def scope_byname(space, final_scoped_name):
     state = space.fromcache(State)
     try:
-        return state.cppscope_cache[true_name]
+        return state.cppscope_cache[final_scoped_name]
     except KeyError:
         pass
 
-    opaque_handle = capi.c_get_scope_opaque(space, true_name)
+    opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name)
     assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
     if opaque_handle:
-        final_name = capi.c_final_name(space, opaque_handle)
-        if capi.c_is_namespace(space, opaque_handle):
-            cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle)
-        elif capi.c_has_complex_hierarchy(space, opaque_handle):
-            cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle)
+        isns = capi.c_is_namespace(space, opaque_handle)
+        if isns:
+            cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name)
         else:
-            cppscope = W_CPPClassDecl(space, final_name, opaque_handle)
-        state.cppscope_cache[name] = cppscope
+            if capi.c_has_complex_hierarchy(space, opaque_handle):
+                cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name)
+            else:
+                cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name)
 
-        cppscope._build_methods()
-        cppscope._find_datamembers()
+        # store in the cache to prevent recursion
+        state.cppscope_cache[final_scoped_name] = cppscope
+
+        if not isns:
+            # build methods/data; TODO: also defer this for classes (a functional __dir__
+            # and instrospection for help() is enough and allows more lazy loading)
+            cppscope._build_methods()
+            cppscope._find_datamembers()
+
         return cppscope
 
     return None
 
- at unwrap_spec(name='text')
-def template_byname(space, name):
-    state = space.fromcache(State)
-    try:
-        return state.cpptemplate_cache[name]
-    except KeyError:
-        pass
-
-    if capi.c_is_template(space, name):
-        cpptemplate = W_CPPTemplateType(space, name)
-        state.cpptemplate_cache[name] = cpptemplate
-        return cpptemplate
-
-    return None
+ at unwrap_spec(final_scoped_name='text')
+def is_template(space, final_scoped_name):
+    return space.newbool(capi.c_is_template(space, final_scoped_name))
 
 def std_string_name(space):
     return space.newtext(capi.std_string_name)
@@ -189,8 +194,13 @@
         # check number of given arguments against required (== total - defaults)
         args_expected = len(self.arg_defs)
         args_given = len(args_w)
-        if args_expected < args_given or args_given < self.args_required:
-            raise oefmt(self.space.w_TypeError, "wrong number of arguments")
+
+        if args_given < self.args_required:
+            raise oefmt(self.space.w_TypeError,
+                "takes at least %d arguments (%d given)", self.args_required, args_given)
+        elif args_expected < args_given:
+            raise oefmt(self.space.w_TypeError,
+                "takes at most %d arguments (%d given)", args_expected, args_given)
 
         # initial setup of converters, executors, and libffi (if available)
         if self.converters is None:
@@ -376,8 +386,11 @@
             conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i)
         capi.c_deallocate_function_args(self.space, args)
 
-    def signature(self):
-        return capi.c_method_signature(self.space, self.scope, self.index)
+    def signature(self, show_formalargs=True):
+        return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs)
+
+    def prototype(self, show_formalargs=True):
+        return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs)
 
     def priority(self):
         total_arg_priority = 0
@@ -391,7 +404,7 @@
             lltype.free(self.cif_descr, flavor='raw')
 
     def __repr__(self):
-        return "CPPMethod: %s" % self.signature()
+        return "CPPMethod: %s" % self.prototype()
 
     def _freeze_(self):
         assert 0, "you should never have a pre-built instance of this!"
@@ -407,7 +420,7 @@
         return capi.C_NULL_OBJECT
 
     def __repr__(self):
-        return "CPPFunction: %s" % self.signature()
+        return "CPPFunction: %s" % self.prototype()
 
 
 class CPPTemplatedCall(CPPMethod):
@@ -440,7 +453,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPTemplatedCall: %s" % self.signature()
+        return "CPPTemplatedCall: %s" % self.prototype()
 
 
 class CPPConstructor(CPPMethod):
@@ -462,7 +475,7 @@
         return CPPMethod.call(self, cppthis, args_w)
 
     def __repr__(self):
-        return "CPPConstructor: %s" % self.signature()
+        return "CPPConstructor: %s" % self.prototype()
 
 
 class CPPSetItem(CPPMethod):
@@ -549,12 +562,12 @@
                     w_exc_type = e.w_type
                 elif all_same_type and not e.match(self.space, w_exc_type):
                     all_same_type = False
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    '+e.errorstr(self.space)
             except Exception as e:
                 # can not special case this for non-overloaded functions as we anyway need an
                 # OperationError error down from here
-                errmsg += '\n  '+cppyyfunc.signature()+' =>\n'
+                errmsg += '\n  '+cppyyfunc.prototype()+' =>\n'
                 errmsg += '    Exception: '+str(e)
 
         if all_same_type and w_exc_type is not None:
@@ -562,20 +575,20 @@
         else:
             raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg))
 
-    def signature(self):
-        sig = self.functions[0].signature()
+    def prototype(self):
+        sig = self.functions[0].prototype()
         for i in range(1, len(self.functions)):
-            sig += '\n'+self.functions[i].signature()
+            sig += '\n'+self.functions[i].prototype()
         return self.space.newtext(sig)
 
     def __repr__(self):
-        return "W_CPPOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPOverload.typedef = TypeDef(
     'CPPOverload',
     is_static = interp2app(W_CPPOverload.is_static),
     call = interp2app(W_CPPOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPOverload.prototype),
 )
 
 
@@ -591,24 +604,40 @@
     @jit.unroll_safe
     @unwrap_spec(args_w='args_w')
     def call(self, w_cppinstance, args_w):
+        # TODO: factor out the following:
+        if capi.c_is_abstract(self.space, self.scope.handle):
+            raise oefmt(self.space.w_TypeError,
+                        "cannot instantiate abstract class '%s'",
+                        self.scope.name)
         w_result = W_CPPOverload.call(self, w_cppinstance, args_w)
         newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result))
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if cppinstance is not None:
             cppinstance._rawobject = newthis
             memory_regulator.register(cppinstance)
-            return w_cppinstance
-        return wrap_cppobject(self.space, newthis, self.functions[0].scope,
-                              do_cast=False, python_owns=True, fresh=True)
 
     def __repr__(self):
-        return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions]
+        return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions]
 
 W_CPPConstructorOverload.typedef = TypeDef(
     'CPPConstructorOverload',
     is_static = interp2app(W_CPPConstructorOverload.is_static),
     call = interp2app(W_CPPConstructorOverload.call),
-    signature = interp2app(W_CPPOverload.signature),
+    prototype = interp2app(W_CPPConstructorOverload.prototype),
+)
+
+
+class W_CPPTemplateOverload(W_CPPOverload):
+    @unwrap_spec(args_w='args_w')
+    def __getitem__(self, args_w):
+        pass
+
+    def __repr__(self):
+        return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions]
+
+W_CPPTemplateOverload.typedef = TypeDef(
+    'CPPTemplateOverload',
+    __getitem__ = interp2app(W_CPPTemplateOverload.call),
 )
 
 
@@ -622,6 +651,9 @@
     def __call__(self, args_w):
         return self.method.bound_call(self.cppthis, args_w)
 
+    def __repr__(self):
+        return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions]
+
 W_CPPBoundMethod.typedef = TypeDef(
     'CPPBoundMethod',
     __call__ = interp2app(W_CPPBoundMethod.__call__),
@@ -643,8 +675,8 @@
 
     def _get_offset(self, cppinstance):
         if cppinstance:
-            assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle)
-            offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope)
+            assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle)
+            offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope)
         else:
             offset = self.offset
         return offset
@@ -652,7 +684,7 @@
     def get(self, w_cppinstance, w_pycppclass):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset)
@@ -660,7 +692,7 @@
     def set(self, w_cppinstance, w_value):
         cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True)
         if not cppinstance:
-            raise oefmt(self.space.w_ReferenceError,
+            raise oefmt(self.space.w_AttributeError,
                         "attribute access requires an instance")
         offset = self._get_offset(cppinstance)
         self.converter.to_memory(self.space, w_cppinstance, w_value, offset)
@@ -705,12 +737,12 @@
         return space.w_False
 
 class W_CPPScopeDecl(W_Root):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
     _immutable_fields_ = ['handle', 'name']
 
-    def __init__(self, space, name, opaque_handle):
+    def __init__(self, space, opaque_handle, final_scoped_name):
         self.space = space
-        self.name = name
+        self.name = final_scoped_name
         assert lltype.typeOf(opaque_handle) == capi.C_SCOPE
         self.handle = opaque_handle
         self.methods = {}
@@ -753,7 +785,7 @@
         overload = self.get_overload(name)
         sig = '(%s)' % signature
         for f in overload.functions:
-            if 0 < f.signature().find(sig):
+            if f.signature(False) == sig:
                 return W_CPPOverload(self.space, self, [f])
         raise oefmt(self.space.w_LookupError, "no overload matches signature")
 
@@ -769,6 +801,9 @@
 # classes for inheritance. Both are python classes, though, and refactoring
 # may be in order at some point.
 class W_CPPNamespaceDecl(W_CPPScopeDecl):
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name']
+
     def _make_cppfunction(self, pyname, index):
         num_args = capi.c_method_num_args(self.space, self, index)
         args_required = capi.c_method_req_args(self.space, self, index)
@@ -779,9 +814,6 @@
             arg_defs.append((arg_type, arg_dflt))
         return CPPFunction(self.space, self, index, arg_defs, args_required)
 
-    def _build_methods(self):
-        pass       # force lazy lookups in namespaces
-
     def _make_datamember(self, dm_name, dm_idx):
         type_name = capi.c_datamember_type(self.space, self, dm_idx)
         offset = capi.c_datamember_offset(self.space, self, dm_idx)
@@ -791,9 +823,6 @@
         self.datamembers[dm_name] = datamember
         return datamember
 
-    def _find_datamembers(self):
-        pass       # force lazy lookups in namespaces
-
     def find_overload(self, meth_name):
         indices = capi.c_method_indices_from_name(self.space, self, meth_name)
         if not indices:
@@ -855,18 +884,21 @@
 
 
 class W_CPPClassDecl(W_CPPScopeDecl):
-    _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers']
-    _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]']
+    _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers']
+    _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]']
 
     def _build_methods(self):
         assert len(self.methods) == 0
         methods_temp = {}
         for i in range(capi.c_num_methods(self.space, self)):
             idx = capi.c_method_index_at(self.space, self, i)
-            pyname = helper.map_operator_name(self.space,
-                capi.c_method_name(self.space, self, idx),
-                capi.c_method_num_args(self.space, self, idx),
-                capi.c_method_result_type(self.space, self, idx))
+            if capi.c_is_constructor(self.space, self, idx):
+                pyname = '__init__'
+            else:
+                pyname = helper.map_operator_name(self.space,
+                    capi.c_method_name(self.space, self, idx),
+                    capi.c_method_num_args(self.space, self, idx),
+                    capi.c_method_result_type(self.space, self, idx))
             cppmethod = self._make_cppfunction(pyname, idx)
             methods_temp.setdefault(pyname, []).append(cppmethod)
         # the following covers the case where the only kind of operator[](idx)
@@ -883,7 +915,7 @@
         # create the overload methods from the method sets
         for pyname, methods in methods_temp.iteritems():
             CPPMethodSort(methods).sort()
-            if pyname == self.name:
+            if pyname == '__init__':
                 overload = W_CPPConstructorOverload(self.space, self, methods[:])
             else:
                 overload = W_CPPOverload(self.space, self, methods[:])
@@ -934,11 +966,11 @@
         raise self.missing_attribute_error(name)
 
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return 0
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         return cppinstance.get_rawobject()
 
     def is_namespace(self):
@@ -973,13 +1005,13 @@
 
 class W_CPPComplexClassDecl(W_CPPClassDecl):
     def get_base_offset(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = capi.c_base_offset(self.space,
                                     self, calling_scope, cppinstance.get_rawobject(), 1)
         return offset
 
     def get_cppthis(self, cppinstance, calling_scope):
-        assert self == cppinstance.cppclass
+        assert self == cppinstance.clsdecl
         offset = self.get_base_offset(cppinstance, calling_scope)
         return capi.direct_ptradd(cppinstance.get_rawobject(), offset)
 
@@ -997,70 +1029,56 @@
 W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False
 
 
-class W_CPPTemplateType(W_Root):
-    _attrs_ = ['space', 'name']
-    _immutable_fields = ['name']
-
-    def __init__(self, space, name):
-        self.space = space
-        self.name = name
-
-    @unwrap_spec(args_w='args_w')
-    def __call__(self, args_w):
-        # TODO: this is broken but unused (see pythonify.py)
-        fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>'])
-        return scope_byname(self.space, fullname)
-
-W_CPPTemplateType.typedef = TypeDef(
-    'CPPTemplateType',
-    __call__ = interp2app(W_CPPTemplateType.__call__),
-)
-W_CPPTemplateType.typedef.acceptable_as_base_class = False
-
-
 class W_CPPClass(W_Root):
-    _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns',
+    _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags',
                'finalizer_registered']
-    _immutable_fields_ = ["cppclass", "isref"]
+    _immutable_fields_ = ['clsdecl']
 
     finalizer_registered = False
 
-    def __init__(self, space, cppclass, rawobject, isref, python_owns):
+    def __init__(self, space, decl, rawobject, isref, python_owns):
         self.space = space
-        self.cppclass = cppclass
+        self.clsdecl = decl
         assert lltype.typeOf(rawobject) == capi.C_OBJECT
         assert not isref or rawobject
         self._rawobject = rawobject
         assert not isref or not python_owns
-        self.isref = isref
-        self.python_owns = python_owns
-        self._opt_register_finalizer()
+        self.flags = 0
+        if isref:
+            self.flags |= INSTANCE_FLAGS_IS_REF
+        if python_owns:
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
 
     def _opt_register_finalizer(self):
-        if self.python_owns and not self.finalizer_registered \
-               and not hasattr(self.space, "fake"):
+        if not self.finalizer_registered and not hasattr(self.space, "fake"):
+            assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS
             self.register_finalizer(self.space)
             self.finalizer_registered = True
 
     def _nullcheck(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             raise oefmt(self.space.w_ReferenceError,
                         "trying to access a NULL pointer")
 
     # allow user to determine ownership rules on a per object level
     def fget_python_owns(self, space):
-        return space.newbool(self.python_owns)
+        return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS))
 
     @unwrap_spec(value=bool)
     def fset_python_owns(self, space, value):
-        self.python_owns = space.is_true(value)
-        self._opt_register_finalizer()
+        if space.is_true(value):
+            self.flags |= INSTANCE_FLAGS_PYTHON_OWNS
+            self._opt_register_finalizer()
+        else:
+            self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS
 
     def get_cppthis(self, calling_scope):
-        return self.cppclass.get_cppthis(self, calling_scope)
+        return self.clsdecl.get_cppthis(self, calling_scope)
 
     def get_rawobject(self):
-        if not self.isref:
+        if not (self.flags & INSTANCE_FLAGS_IS_REF):
             return self._rawobject
         else:
             ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject)
@@ -1078,12 +1096,9 @@
         return None
 
     def instance__init__(self, args_w):
-        if capi.c_is_abstract(self.space, self.cppclass.handle):
-            raise oefmt(self.space.w_TypeError,
-                        "cannot instantiate abstract class '%s'",
-                        self.cppclass.name)
-        constructor_overload = self.cppclass.get_overload(self.cppclass.name)
-        constructor_overload.call(self, args_w)
+        raise oefmt(self.space.w_TypeError,
+                    "cannot instantiate abstract class '%s'",
+                    self.clsdecl.name)
  
     def instance__eq__(self, w_other):
         # special case: if other is None, compare pointer-style
@@ -1099,7 +1114,7 @@
             for name in ["", "__gnu_cxx", "__1"]:
                 nss = scope_byname(self.space, name)
                 meth_idx = capi.c_get_global_operator(
-                    self.space, nss, self.cppclass, other.cppclass, "operator==")
+                    self.space, nss, self.clsdecl, other.clsdecl, "operator==")
                 if meth_idx != -1:
                     f = nss._make_cppfunction("operator==", meth_idx)
                     ol = W_CPPOverload(self.space, nss, [f])
@@ -1118,14 +1133,15 @@
         # fallback 2: direct pointer comparison (the class comparison is needed since
         # the first data member in a struct and the struct have the same address)
         other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False)  # TODO: factor out
-        iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass)
+        iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl)
         return self.space.newbool(iseq)
 
     def instance__ne__(self, w_other):
         return self.space.not_(self.instance__eq__(w_other))
 
     def instance__nonzero__(self):
-        if not self._rawobject or (self.isref and not self.get_rawobject()):
+        if not self._rawobject or \
+               ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()):
             return self.space.w_False
         return self.space.w_True
 
@@ -1134,36 +1150,35 @@
         if w_as_builtin is not None:
             return self.space.len(w_as_builtin)
         raise oefmt(self.space.w_TypeError,
-                    "'%s' has no length", self.cppclass.name)
+                    "'%s' has no length", self.clsdecl.name)
 
     def instance__cmp__(self, w_other):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.cmp(w_as_builtin, w_other)
         raise oefmt(self.space.w_AttributeError,
-                    "'%s' has no attribute __cmp__", self.cppclass.name)
+                    "'%s' has no attribute __cmp__", self.clsdecl.name)
 
     def instance__repr__(self):
         w_as_builtin = self._get_as_builtin()
         if w_as_builtin is not None:
             return self.space.repr(w_as_builtin)
         return self.space.newtext("<%s object at 0x%x>" %
-                               (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
+                               (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject())))
 
     def destruct(self):
-        if self._rawobject and not self.isref:
+        if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF):
             memory_regulator.unregister(self)
-            capi.c_destruct(self.space, self.cppclass, self._rawobject)
+            capi.c_destruct(self.space, self.clsdecl, self._rawobject)
             self._rawobject = capi.C_NULL_OBJECT
 
     def _finalize_(self):
-        if self.python_owns:
+        if self.flags & INSTANCE_FLAGS_PYTHON_OWNS:
             self.destruct()
 
 W_CPPClass.typedef = TypeDef(
     'CPPClass',
-    cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass),
-    _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
+    __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns),
     __init__ = interp2app(W_CPPClass.instance__init__),
     __eq__ = interp2app(W_CPPClass.instance__eq__),
     __ne__ = interp2app(W_CPPClass.instance__ne__),
@@ -1220,21 +1235,21 @@
     state = space.fromcache(State)
     return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar))
 
-def wrap_cppobject(space, rawobject, cppclass,
-                   do_cast=True, python_owns=False, is_ref=False, fresh=False):
+def wrap_cppinstance(space, rawobject, clsdecl,
+                     do_cast=True, python_owns=False, is_ref=False, fresh=False):
     rawobject = rffi.cast(capi.C_OBJECT, rawobject)
 
     # cast to actual if requested and possible
     w_pycppclass = None
     if do_cast and rawobject:
-        actual = capi.c_actual_class(space, cppclass, rawobject)
-        if actual != cppclass.handle:
+        actual = capi.c_actual_class(space, clsdecl, rawobject)
+        if actual != clsdecl.handle:
             try:
                 w_pycppclass = get_pythonized_cppclass(space, actual)
-                offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1)
+                offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1)
                 rawobject = capi.direct_ptradd(rawobject, offset)
-                w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-                cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False)
+                w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
+                clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False)
             except Exception:
                 # failed to locate/build the derived class, so stick to the base (note
                 # that only get_pythonized_cppclass is expected to raise, so none of
@@ -1242,18 +1257,18 @@
                 pass
 
     if w_pycppclass is None:
-        w_pycppclass = get_pythonized_cppclass(space, cppclass.handle)
+        w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle)
 
     # try to recycle existing object if this one is not newly created
     if not fresh and rawobject:
         obj = memory_regulator.retrieve(rawobject)
-        if obj is not None and obj.cppclass is cppclass:
+        if obj is not None and obj.clsdecl is clsdecl:
             return obj
 
     # fresh creation
     w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass)
     cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False)
-    cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns)
+    cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns)
     memory_regulator.register(cppinstance)
     return w_cppinstance
 
@@ -1264,7 +1279,7 @@
     except TypeError:
         pass
     # attempt to get address of C++ instance
-    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj))
+    return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False))
 
 @unwrap_spec(w_obj=W_Root)
 def addressof(space, w_obj):
@@ -1273,19 +1288,30 @@
     return space.newlong(address)
 
 @unwrap_spec(owns=bool, cast=bool)
-def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):
-    """Takes an address and a bound C++ class proxy, returns a bound instance."""
+def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False):
     try:
         # attempt address from array or C++ instance
         rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj))
     except Exception:
         # accept integer value as address
         rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj))
-    w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__"))
-    if not w_cppclass:
-        w_cppclass = scope_byname(space, space.text_w(w_pycppclass))
-        if not w_cppclass:
+    decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False)
+    return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast)
+
+ at unwrap_spec(owns=bool, cast=bool)
+def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False):

From pypy.commits at gmail.com  Thu Nov 23 11:50:37 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 23 Nov 2017 08:50:37 -0800 (PST)
Subject: [pypy-commit] pypy default: refactor
Message-ID: <5a16fc5d.c97e1c0a.c2665.dece@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93145:ff05ee1c4b6a
Date: 2017-11-23 16:48 +0000
http://bitbucket.org/pypy/pypy/changeset/ff05ee1c4b6a/

Log:	refactor

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -541,6 +541,10 @@
         self.decoded_chars_used += size
         return chars
 
+    def _has_data(self):
+        return (self.decoded_chars is not None and
+            self.decoded_chars_used < len(self.decoded_chars))
+
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
@@ -588,6 +592,19 @@
 
         return not eof
 
+    def _ensure_data(self, space):
+        while not self._has_data():
+            try:
+                if not self._read_chunk(space):
+                    self._unset_decoded()
+                    self.snapshot = None
+                    return False
+            except OperationError as e:
+                if trap_eintr(space, e):
+                    continue
+                raise
+        return True
+
     def next_w(self, space):
         self._check_attached(space)
         self.telling = False
@@ -621,23 +638,13 @@
         builder = UnicodeBuilder(size)
 
         # Keep reading chunks until we have n characters to return
-        while True:
+        while remaining > 0:
+            if not self._ensure_data(space):
+                break
             data = self._get_decoded_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
-            if remaining <= 0: # Done
-                break
-
-            try:
-                if not self._read_chunk(space):
-                    # EOF
-                    break
-            except OperationError as e:
-                if trap_eintr(space, e):
-                    continue
-                raise
-
         return space.newunicode(builder.build())
 
     def readline_w(self, space, w_limit=None):
@@ -653,20 +660,9 @@
 
         while True:
             # First, get some data if necessary
-            has_data = True
-            while not self.decoded_chars:
-                try:
-                    if not self._read_chunk(space):
-                        has_data = False
-                        break
-                except OperationError as e:
-                    if trap_eintr(space, e):
-                        continue
-                    raise
+            has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                self._unset_decoded()
-                self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
 

From pypy.commits at gmail.com  Thu Nov 23 12:31:06 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 09:31:06 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix multibytecodec
Message-ID: <5a1705da.2785df0a.e3321.a8b3@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93146:99ca8cf9bbc4
Date: 2017-11-23 18:30 +0100
http://bitbucket.org/pypy/pypy/changeset/99ca8cf9bbc4/

Log:	fix multibytecodec

diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -197,19 +197,21 @@
 MBENC_FLUSH = 1
 MBENC_RESET = 2
 
-def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
+def encode(codec, unicodedata, length, errors="strict", errorcb=None,
+           namecb=None):
     encodebuf = pypy_cjk_enc_new(codec)
     if not encodebuf:
         raise MemoryError
     try:
-        return encodeex(encodebuf, unicodedata, errors, errorcb, namecb)
+        return encodeex(encodebuf, unicodedata, length, errors, errorcb, namecb)
     finally:
         pypy_cjk_enc_free(encodebuf)
 
-def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
+def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None,
              namecb=None, ignore_error=0):
-    inleft = len(unicodedata)
-    with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf:
+    inleft = length
+    inbuf = rffi.utf82wcharp(utf8data, length)
+    try:
         if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
         if ignore_error == 0:
@@ -221,16 +223,18 @@
             if r == 0 or r == ignore_error:
                 break
             multibytecodec_encerror(encodebuf, r, errors,
-                                    errorcb, namecb, unicodedata)
+                                    errorcb, namecb, utf8data)
         while flags & MBENC_RESET:
             r = pypy_cjk_enc_reset(encodebuf)
             if r == 0:
                 break
             multibytecodec_encerror(encodebuf, r, errors,
-                                    errorcb, namecb, unicodedata)
+                                    errorcb, namecb, utf8data)
         src = pypy_cjk_enc_outbuf(encodebuf)
         length = pypy_cjk_enc_outlen(encodebuf)
         return rffi.charpsize2str(src, length)
+    finally:
+        lltype.free(inbuf, flavor='raw')
 
 def multibytecodec_encerror(encodebuf, e, errors,
                             errorcb, namecb, unicodedata):
@@ -256,21 +260,16 @@
     elif errors == "replace":
         codec = pypy_cjk_enc_getcodec(encodebuf)
         try:
-            replace = encode(codec, u"?")
+            replace = encode(codec, "?", 1)
         except EncodeDecodeError:
             replace = "?"
     else:
         assert errorcb
-        XXX
-        retu, rets, end = errorcb(errors, namecb, reason,
-                                  unicodedata.encode("utf8"), start, end)
-        if rets is not None:
-            # py3k only
-            replace = rets
-        else:
-            assert retu is not None
-            codec = pypy_cjk_enc_getcodec(encodebuf)
-            replace = encode(codec, retu, "strict", errorcb, namecb)
+        rets, end = errorcb(errors, namecb, reason,
+                            unicodedata, start, end)
+        codec = pypy_cjk_enc_getcodec(encodebuf)
+        lgt, _ = rutf8.get_utf8_length_flag(rets)
+        replace = encode(codec, rets, lgt, "strict", errorcb, namecb)
     with rffi.scoped_nonmovingbuffer(replace) as inbuf:
         r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
     if r == MBERR_NOMEMORY:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
--- a/pypy/module/_multibytecodec/interp_incremental.py
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -1,4 +1,5 @@
 from rpython.rtyper.lltypesystem import lltype
+from rpython.rlib import rutf8
 from pypy.module._multibytecodec import c_codecs
 from pypy.module._multibytecodec.interp_multibytecodec import (
     MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror,
@@ -65,7 +66,8 @@
         pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
         assert 0 <= pos <= len(object)
         self.pending = object[pos:]
-        return space.newunicode(output)
+        lgt, flag = rutf8.get_utf8_length_flag(output)
+        return space.newutf8(output, lgt, flag)
 
 
 @unwrap_spec(errors="text_or_none")
@@ -88,7 +90,8 @@
 
     def _initialize(self):
         self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec)
-        self.pending = u""
+        self.pending = ""
+        self.pending_len = 0
 
     def _free(self):
         self.pending = None
@@ -96,25 +99,37 @@
             c_codecs.pypy_cjk_enc_free(self.encodebuf)
             self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
 
-    @unwrap_spec(object='utf8', final=bool)
-    def encode_w(self, object, final=False):
-        u_object = object.decode('utf8')
+    @unwrap_spec(final=bool)
+    def encode_w(self, space, w_object, final=False):
+        utf8data, length = space.utf8_len_w(w_object)
         space = self.space
         state = space.fromcache(CodecState)
         if len(self.pending) > 0:
-            u_object = self.pending + u_object
+            utf8data = self.pending + utf8data
+            length += self.pending_len
         try:
-            output = c_codecs.encodeex(self.encodebuf, u_object, self.errors,
+            output = c_codecs.encodeex(self.encodebuf, utf8data, length,
+                                       self.errors,
                                        state.encode_error_handler, self.name,
                                        get_ignore_error(final))
         except c_codecs.EncodeDecodeError as e:
-            raise wrap_unicodeencodeerror(space, e, object, len(u_object),
+            raise wrap_unicodeencodeerror(space, e, utf8data, length,
                                           self.name)
         except RuntimeError:
             raise wrap_runtimeerror(space)
         pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
-        assert 0 <= pos <= len(u_object)
-        self.pending = u_object[pos:]
+        assert 0 <= pos <= length
+        # scan the utf8 string until we hit pos
+        i = 0
+        stop = length - pos
+        self.pending_len = stop
+        if stop > 0:
+            while pos > 0:
+                i = rutf8.next_codepoint_pos(utf8data, i)
+                pos -= 1
+            self.pending = utf8data[i:]
+        else:
+            self.pending = ""
         return space.newbytes(output)
 
 
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -31,23 +31,23 @@
         return space.newtuple([space.newutf8(utf8_output, lgt, flag),
                                space.newint(len(input))])
 
-    @unwrap_spec(input='utf8', errors="text_or_none")
-    def encode(self, space, input, errors=None):
+    @unwrap_spec(errors="text_or_none")
+    def encode(self, space, w_input, errors=None):
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
+        input, length = space.utf8_len_w(w_input)
         #
-        u_input = input.decode('utf8')
         try:
-            output = c_codecs.encode(self.codec, u_input, errors,
+            output = c_codecs.encode(self.codec, input, length, errors,
                                      state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError as e:
-            raise wrap_unicodeencodeerror(space, e, input, len(u_input),
+            raise wrap_unicodeencodeerror(space, e, input, length,
                                           self.name)
         except RuntimeError:
             raise wrap_runtimeerror(space)
         return space.newtuple([space.newbytes(output),
-                               space.newint(len(u_input))])
+                               space.newint(length)])
 
 
 MultibyteCodec.typedef = TypeDef(
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -14,27 +14,27 @@
 def test_decode_gbk():
     c = getcodec("gbk")
     u = decode(c, "\xA1\xAA")
-    assert u == unichr(0x2014)
+    assert u == unichr(0x2014).encode('utf8')
     u = decode(c, "foobar")
-    assert u == u"foobar"
+    assert u == "foobar"
 
 def test_decode_hz():
     # stateful
     c = getcodec("hz")
     u = decode(c, "~{abc}")
-    assert u == u'\u5f95\u6cef'
+    assert u == u'\u5f95\u6cef'.encode('utf8')
     u = decode(c, "~{")
-    assert u == u''
+    assert u == ''
 
 def test_decodeex_hz():
     c = getcodec("hz")
     decodebuf = c_codecs.pypy_cjk_dec_new(c)
     u = c_codecs.decodeex(decodebuf, "~{abcd~}")
-    assert u == u'\u5f95\u6c85'
+    assert u == u'\u5f95\u6c85'.encode('utf8')
     u = c_codecs.decodeex(decodebuf, "~{efgh~}")
-    assert u == u'\u5f50\u73b7'
+    assert u == u'\u5f50\u73b7'.encode('utf8')
     u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
-    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'.encode('utf8')
     c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decodeex_hz_incomplete():
@@ -64,7 +64,7 @@
         buf += c
         u = c_codecs.decodeex(decodebuf, buf,
                               ignore_error = c_codecs.MBERR_TOOFEW)
-        assert u == output
+        assert u == output.encode('utf8')
         incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
         buf = buf[incompletepos:]
     assert buf == ''
@@ -86,46 +86,47 @@
 def test_decode_hz_ignore():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'ignore')
-    assert u == u'def\u5fcf'
+    assert u == u'def\u5fcf'.encode('utf8')
 
 def test_decode_hz_replace():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'replace')
-    assert u == u'def\ufffd\u5fcf'
+    assert u == u'def\ufffd\u5fcf'.encode('utf8')
 
 def test_encode_hz():
     c = getcodec("hz")
-    s = encode(c, u'foobar')
+    s = encode(c, u'foobar'.encode('utf8'), 6)
     assert s == 'foobar' and type(s) is str
-    s = encode(c, u'\u5f95\u6cef')
+    s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2)
     assert s == '~{abc}~}'
 
 def test_encode_hz_error():
     # error
     c = getcodec("hz")
-    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value
+    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def'.encode('utf8'), 7).value
     assert e.start == 3
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
 def test_encode_hz_ignore():
     c = getcodec("hz")
-    s = encode(c, u'abc\u1234def', 'ignore')
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'ignore')
     assert s == 'abcdef'
 
 def test_encode_hz_replace():
     c = getcodec("hz")
-    s = encode(c, u'abc\u1234def', 'replace')
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'replace')
     assert s == 'abc?def'
 
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
-    s = encode(c, u'\u83ca\u5730\u6642\u592b')
+    s = encode(c, u'\u83ca\u5730\u6642\u592b'.encode('utf8'), 4)
     assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str
 
 def test_encode_custom_error_handler_bytes():
+    py.test.skip("needs revamping in py3k")
     c = getcodec("hz")
     def errorhandler(errors, enc, msg, t, startingpos, endingpos):
-        return None, '\xc3', endingpos
-    s = encode(c, u'abc\u1234def', 'foo', errorhandler)
+        return u'\xc3'.encode('utf8'), endingpos
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler)
     assert '\xc3' in s
diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py
--- a/pypy/module/_multibytecodec/test/test_translation.py
+++ b/pypy/module/_multibytecodec/test/test_translation.py
@@ -1,6 +1,7 @@
 from pypy.module._multibytecodec import c_codecs
 from rpython.translator.c.test import test_standalone
 from rpython.config.translationoption import get_combined_translation_config
+from rpython.rlib import rutf8
 
 
 class TestTranslation(test_standalone.StandaloneTests):
@@ -13,7 +14,8 @@
             codecname, string = argv[1], argv[2]
             c = c_codecs.getcodec(codecname)
             u = c_codecs.decode(c, string)
-            r = c_codecs.encode(c, u)
+            lgt, _ = rutf8.get_utf8_length_flag(u)
+            r = c_codecs.encode(c, u, lgt)
             print r
             return 0
         #

From pypy.commits at gmail.com  Thu Nov 23 12:55:56 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 23 Nov 2017 09:55:56 -0800 (PST)
Subject: [pypy-commit] pypy default: Simplify _find_line_ending() and fix
 logic in the case of embedded \r and self.readnl=='\r\n'
Message-ID: <5a170bac.8fb1df0a.eb254.5f3f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93147:8369cd92f7d0
Date: 2017-11-23 17:52 +0000
http://bitbucket.org/pypy/pypy/changeset/8369cd92f7d0/

Log:	Simplify _find_line_ending() and fix logic in the case of embedded
	\r and self.readnl=='\r\n'

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -216,14 +216,7 @@
 
     def _find_line_ending(self, line, start, end):
         size = end - start
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            pos = line.find(u'\n', start, end)
-            if pos >= 0:
-                return pos + 1, 0
-            else:
-                return -1, size
-        elif self.readuniversal:
+        if self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
             i = start
@@ -242,16 +235,22 @@
                         return i + 1, 0
                     else:
                         return i, 0
+        if self.readtranslate:
+            # Newlines are already translated, only search for \n
+            newline = u'\n'
         else:
             # Non-universal mode.
-            pos = line.find(self.readnl, start, end)
-            if pos >= 0:
-                return pos + len(self.readnl), 0
-            else:
-                pos = line.find(self.readnl[0], start, end)
-                if pos >= 0:
-                    return -1, pos - start
-                return -1, size
+            newline = self.readnl
+        end_scan = end - len(newline) + 1
+        for i in range(start, end_scan):
+            ch = line[i]
+            if ch == newline[0]:
+                for j in range(1, len(newline)):
+                    if line[i + j] != newline[j]:
+                        break
+                else:
+                    return i + len(newline), 0
+        return -1, end_scan
 
 
 W_TextIOBase.typedef = TypeDef(

From pypy.commits at gmail.com  Thu Nov 23 13:02:57 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 23 Nov 2017 10:02:57 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: one part of interp_sre
Message-ID: <5a170d51.e1acdf0a.beeec.9bf2@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93148:5a057586add0
Date: 2017-11-23 19:02 +0100
http://bitbucket.org/pypy/pypy/changeset/5a057586add0/

Log:	one part of interp_sre

diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -7,7 +7,8 @@
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rlib.rarithmetic import intmask
 from rpython.rlib import jit
-from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import Utf8StringBuilder
 
 # ____________________________________________________________
 #
@@ -237,8 +238,8 @@
             filter_is_callable = True
         else:
             if space.isinstance_w(w_ptemplate, space.w_unicode):
-                filter_as_unicode = space.unicode_w(w_ptemplate)
-                literal = u'\\' not in filter_as_unicode
+                filter_as_unicode = space.utf8_w(w_ptemplate)
+                literal = '\\' not in filter_as_unicode
                 use_builder = (
                     space.isinstance_w(w_string, space.w_unicode) and literal)
             else:
@@ -267,7 +268,7 @@
         sublist_w = strbuilder = unicodebuilder = None
         if use_builder:
             if filter_as_unicode is not None:
-                unicodebuilder = UnicodeBuilder(ctx.end)
+                unicodebuilder = Utf8StringBuilder(ctx.end)
             else:
                 assert filter_as_string is not None
                 strbuilder = StringBuilder(ctx.end)
@@ -335,7 +336,9 @@
                 return space.newbytes(strbuilder.build()), n
             else:
                 assert unicodebuilder is not None
-                return space.newunicode(unicodebuilder.build()), n
+                return space.newutf8(unicodebuilder.build(),
+                                     unicodebuilder.get_length(),
+                                     unicodebuilder.get_flag()), n
         else:
             if space.isinstance_w(w_string, space.w_unicode):
                 w_emptystr = space.newunicode(u'')

From pypy.commits at gmail.com  Thu Nov 23 13:11:59 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 23 Nov 2017 10:11:59 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: hg merge default
Message-ID: <5a170f6f.cc5e1c0a.87e67.c5b7@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93149:0797bb6394b6
Date: 2017-11-23 18:07 +0000
http://bitbucket.org/pypy/pypy/changeset/0797bb6394b6/

Log:	hg merge default

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -223,14 +223,7 @@
 
     def _find_line_ending(self, line, start, end):
         size = end - start
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            pos = line.find('\n', start, end)
-            if pos >= 0:
-                return pos + 1, 0
-            else:
-                return -1, size
-        elif self.readuniversal:
+        if self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
             i = start
@@ -249,16 +242,22 @@
                         return i + 1, 0
                     else:
                         return i, 0
+        if self.readtranslate:
+            # Newlines are already translated, only search for \n
+            newline = '\n'
         else:
             # Non-universal mode.
-            pos = line.find(self.readnl, start, end)
-            if pos >= 0:
-                return pos + len(self.readnl), 0
-            else:
-                pos = line.find(self.readnl[0], start, end)
-                if pos >= 0:
-                    return -1, pos - start
-                return -1, size
+            newline = self.readnl
+        end_scan = end - len(newline) + 1
+        for i in range(start, end_scan):
+            ch = line[i]
+            if ch == newline[0]:
+                for j in range(1, len(newline)):
+                    if line[i + j] != newline[j]:
+                        break
+                else:
+                    return i + len(newline), 0
+        return -1, end_scan
 
 
 W_TextIOBase.typedef = TypeDef(
@@ -548,6 +547,10 @@
         self.decoded_chars_used += size
         return chars
 
+    def _has_data(self):
+        return (self.decoded_chars is not None and
+            self.decoded_chars_used < len(self.decoded_chars))
+
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
@@ -595,6 +598,19 @@
 
         return not eof
 
+    def _ensure_data(self, space):
+        while not self._has_data():
+            try:
+                if not self._read_chunk(space):
+                    self._unset_decoded()
+                    self.snapshot = None
+                    return False
+            except OperationError as e:
+                if trap_eintr(space, e):
+                    continue
+                raise
+        return True
+
     def next_w(self, space):
         self._check_attached(space)
         self.telling = False
@@ -628,23 +644,13 @@
         builder = StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
-        while True:
+        while remaining > 0:
+            if not self._ensure_data(space):
+                break
             data = self._get_decoded_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
-            if remaining <= 0: # Done
-                break
-
-            try:
-                if not self._read_chunk(space):
-                    # EOF
-                    break
-            except OperationError as e:
-                if trap_eintr(space, e):
-                    continue
-                raise
-
         return space.new_from_utf8(builder.build())
 
     def readline_w(self, space, w_limit=None):
@@ -660,20 +666,9 @@
 
         while True:
             # First, get some data if necessary
-            has_data = True
-            while not self.decoded_chars:
-                try:
-                    if not self._read_chunk(space):
-                        has_data = False
-                        break
-                except OperationError as e:
-                    if trap_eintr(space, e):
-                        continue
-                    raise
+            has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                self._unset_decoded()
-                self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
 

From pypy.commits at gmail.com  Thu Nov 23 15:05:45 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 23 Nov 2017 12:05:45 -0800 (PST)
Subject: [pypy-commit] pypy default: fix test use of eci for
 vmprof_start_sampling, vmprof_start_sampling
Message-ID: <5a172a19.03da1c0a.d5476.8ad6@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93151:72001f56a97f
Date: 2017-11-23 20:28 +0200
http://bitbucket.org/pypy/pypy/changeset/72001f56a97f/

Log:	fix test use of eci for vmprof_start_sampling, vmprof_start_sampling

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -9,6 +9,7 @@
 from rpython.rtyper.tool import rffi_platform as platform
 from rpython.rlib import rthread, jit
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.config.translationoption import get_translation_config
 
 class VMProfPlatformUnsupported(Exception):
     pass
@@ -133,11 +134,17 @@
 #endif
 """])
 
+if get_translation_config() is None:
+    # tests need the full eci here
+    _eci = global_eci
+else:
+    _eci = auto_eci
+
 vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
-                                       rffi.INT, compilation_info=auto_eci,
+                                       rffi.INT, compilation_info=_eci,
                                        _nowrapper=True)
 vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
-                                        lltype.Void, compilation_info=auto_eci,
+                                        lltype.Void, compilation_info=_eci,
                                         _nowrapper=True)
 
 
From pypy.commits at gmail.com  Thu Nov 23 15:05:42 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 23 Nov 2017 12:05:42 -0800 (PST)
Subject: [pypy-commit] pypy default: cannot pip install vmprof on arm, s390x
Message-ID: <5a172a16.21b9df0a.dcef.f631@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93150:8c42f0f755c0
Date: 2017-11-23 18:48 +0200
http://bitbucket.org/pypy/pypy/changeset/8c42f0f755c0/

Log:	cannot pip install vmprof on arm, s390x

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
 cffi>=1.4.0
-vmprof>=0.4.10  # required to parse log files in rvmprof tests
+
+# parse log files in rvmprof tests
+vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis

From pypy.commits at gmail.com  Thu Nov 23 15:08:55 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 23 Nov 2017 12:08:55 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: merge default into py3.5
Message-ID: <5a172ad7.d58bdf0a.8cc33.c5c9@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.5
Changeset: r93152:ce6402cbdf3c
Date: 2017-11-23 22:08 +0200
http://bitbucket.org/pypy/pypy/changeset/ce6402cbdf3c/

Log:	merge default into py3.5

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -184,9 +184,7 @@
             start,
             end
         )
-        if endpos >= 0:
-            endpos += start
-        else:
+        if endpos < 0:
             endpos = end
         assert endpos >= 0
         self.pos = endpos
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -216,44 +216,41 @@
 
     def _find_line_ending(self, line, start, end):
         size = end - start
-        if self.readtranslate:
-
-            # Newlines are already translated, only search for \n
-            pos = line.find(u'\n', start, end)
-            if pos >= 0:
-                return pos - start + 1, 0
-            else:
-                return -1, size
-        elif self.readuniversal:
+        if self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
-            i = 0
+            i = start
             while True:
-                # Fast path for non-control chars. The loop always ends
-                # since the Py_UNICODE storage is NUL-terminated.
-                while i < size and line[start + i] > '\r':
+                # Fast path for non-control chars.
+                while i < end and line[i] > '\r':
                     i += 1
-                if i >= size:
+                if i >= end:
                     return -1, size
-                ch = line[start + i]
+                ch = line[i]
                 i += 1
                 if ch == '\n':
                     return i, 0
                 if ch == '\r':
-                    if line[start + i] == '\n':
+                    if line[i] == '\n':
                         return i + 1, 0
                     else:
                         return i, 0
+        if self.readtranslate:
+            # Newlines are already translated, only search for \n
+            newline = u'\n'
         else:
             # Non-universal mode.
-            pos = line.find(self.readnl, start, end)
-            if pos >= 0:
-                return pos - start + len(self.readnl), 0
-            else:
-                pos = line.find(self.readnl[0], start, end)
-                if pos >= 0:
-                    return -1, pos - start
-                return -1, size
+            newline = self.readnl
+        end_scan = end - len(newline) + 1
+        for i in range(start, end_scan):
+            ch = line[i]
+            if ch == newline[0]:
+                for j in range(1, len(newline)):
+                    if line[i + j] != newline[j]:
+                        break
+                else:
+                    return i + len(newline), 0
+        return -1, end_scan
 
 
 W_TextIOBase.typedef = TypeDef(
@@ -549,8 +546,13 @@
     # _____________________________________________________________
     # read methods
 
-    def _set_decoded_chars(self, chars):
-        self.decoded_chars = chars
+    def _unset_decoded(self):
+        self.decoded_chars = None
+        self.decoded_chars_used = 0
+
+    def _set_decoded(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.decoded_chars = space.unicode_w(w_decoded)
         self.decoded_chars_used = 0
 
     def _get_decoded_chars(self, size):
@@ -574,6 +576,10 @@
         self.decoded_chars_used += size
         return chars
 
+    def _has_data(self):
+        return (self.decoded_chars is not None and
+            self.decoded_chars_used < len(self.decoded_chars))
+
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
@@ -616,8 +622,7 @@
         eof = input_buf.getlength() == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        check_decoded(space, w_decoded)
-        self._set_decoded_chars(space.unicode_w(w_decoded))
+        self._set_decoded(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -629,6 +634,19 @@
 
         return not eof
 
+    def _ensure_data(self, space):
+        while not self._has_data():
+            try:
+                if not self._read_chunk(space):
+                    self._unset_decoded()
+                    self.snapshot = None
+                    return False
+            except OperationError as e:
+                if trap_eintr(space, e):
+                    continue
+                raise
+        return True
+
     def next_w(self, space):
         self._check_attached(space)
         self.telling = False
@@ -662,23 +680,13 @@
         builder = UnicodeBuilder(size)
 
         # Keep reading chunks until we have n characters to return
-        while True:
+        while remaining > 0:
+            if not self._ensure_data(space):
+                break
             data = self._get_decoded_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
-            if remaining <= 0: # Done
-                break
-
-            try:
-                if not self._read_chunk(space):
-                    # EOF
-                    break
-            except OperationError as e:
-                if trap_eintr(space, e):
-                    continue
-                raise
-
         return space.newunicode(builder.build())
 
     def readline_w(self, space, w_limit=None):
@@ -687,28 +695,16 @@
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-        chunked = 0
 
         line = None
         remaining = None
-        chunks = []
+        builder = UnicodeBuilder()
 
         while True:
             # First, get some data if necessary
-            has_data = True
-            while not self.decoded_chars:
-                try:
-                    if not self._read_chunk(space):
-                        has_data = False
-                        break
-                except OperationError as e:
-                    if trap_eintr(space, e):
-                        continue
-                    raise
+            has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                self._set_decoded_chars(None)
-                self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
 
@@ -725,8 +721,8 @@
 
             line_len = len(line)
             endpos, consumed = self._find_line_ending(line, start, line_len)
+            chunked = builder.getlength()
             if endpos >= 0:
-                endpos += start
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
                     assert endpos >= 0
@@ -744,15 +740,15 @@
             # No line ending seen yet - put aside current data
             if endpos > start:
                 s = line[start:endpos]
-                chunks.append(s)
-                chunked += len(s)
+                builder.append(s)
+
             # There may be some remaining bytes we'll have to prepend to the
             # next chunk of data
             if endpos < line_len:
                 remaining = line[endpos:]
             line = None
             # We have consumed the buffer
-            self._set_decoded_chars(None)
+            self._unset_decoded()
 
         if line:
             # Our line ends in the current buffer
@@ -761,18 +757,12 @@
             self.decoded_chars_used = decoded_chars_used
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
-        if remaining:
-            chunks.append(remaining)
-            remaining = None
-        if chunks:
-            if line:
-                chunks.append(line)
-            line = u''.join(chunks)
+            builder.append(line)
+        elif remaining:
+            builder.append(remaining)
 
-        if line:
-            return space.newunicode(line)
-        else:
-            return space.newunicode(u'')
+        result = builder.build()
+        return space.newunicode(result)
 
     # _____________________________________________________________
     # write methods
@@ -913,7 +903,7 @@
                 self._unsupportedoperation(
                     space, "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._set_decoded_chars(None)
+            self._unset_decoded()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -943,7 +933,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._set_decoded_chars(None)
+        self._unset_decoded()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -964,8 +954,7 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            check_decoded(space, w_decoded)
-            self._set_decoded_chars(space.unicode_w(w_decoded))
+            self._set_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
             if len(self.decoded_chars) < cookie.chars_to_skip:
@@ -1034,7 +1023,7 @@
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(input[i]))
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
 
                 cookie.bytes_to_feed += 1
 
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
 cffi>=1.4.0
-vmprof>=0.4.10  # required to parse log files in rvmprof tests
+
+# parse log files in rvmprof tests
+vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis
diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,7 +3,6 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
-from rpython.rlib import rvmprof
 from rpython.rlib.rvmprof import cintf
 
 DEBUG = False
@@ -41,13 +40,11 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
-        rvmprof.stop_sampling()
         x = cintf.save_rvmprof_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
             cintf.restore_rvmprof_stack(x)
-            rvmprof.start_sampling()
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -56,8 +56,10 @@
     return None
 
 def stop_sampling():
-    fd = _get_vmprof().cintf.vmprof_stop_sampling()
+    from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling
+    fd = vmprof_stop_sampling()
     return rffi.cast(lltype.Signed, fd)
 
 def start_sampling():
-    _get_vmprof().cintf.vmprof_start_sampling()
+    from rpython.rlib.rvmprof.cintf import vmprof_start_sampling
+    vmprof_start_sampling()
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -9,6 +9,7 @@
 from rpython.rtyper.tool import rffi_platform as platform
 from rpython.rlib import rthread, jit
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.config.translationoption import get_translation_config
 
 class VMProfPlatformUnsupported(Exception):
     pass
@@ -40,7 +41,7 @@
     compile_extra += ['-DVMPROF_UNIX']
     compile_extra += ['-DVMPROF_LINUX']
 elif sys.platform == 'win32':
-    compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS']
+    compile_extra += ['-DVMPROF_WINDOWS']
     separate_module_files = [SHARED.join('vmprof_win.c')]
     _libs = []
 else:
@@ -120,16 +121,32 @@
     vmprof_get_profile_path = rffi.llexternal("vmprof_get_profile_path", [rffi.CCHARP, lltype.Signed],
                                               lltype.Signed, compilation_info=eci,
                                               _nowrapper=True)
-    vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
-                                              rffi.INT, compilation_info=eci,
-                                              _nowrapper=True)
-    vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
-                                              lltype.Void, compilation_info=eci,
-                                              _nowrapper=True)
 
     return CInterface(locals())
 
 
+# this is always present, but compiles to no-op if RPYTHON_VMPROF is not
+# defined (i.e. if we don't actually use vmprof in the generated C)
+auto_eci = ExternalCompilationInfo(post_include_bits=["""
+#ifndef RPYTHON_VMPROF
+#  define vmprof_stop_sampling()    (-1)
+#  define vmprof_start_sampling()   ((void)0)
+#endif
+"""])
+
+if get_translation_config() is None:
+    # tests need the full eci here
+    _eci = global_eci
+else:
+    _eci = auto_eci
+
+vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
+                                       rffi.INT, compilation_info=_eci,
+                                       _nowrapper=True)
+vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
+                                        lltype.Void, compilation_info=_eci,
+                                        _nowrapper=True)
+
 
 class CInterface(object):
     def __init__(self, namespace):
@@ -218,6 +235,7 @@
 # stacklet support
 
 def save_rvmprof_stack():
+    vmprof_stop_sampling()
     return vmprof_tl_stack.get_or_make_raw()
 
 def empty_rvmprof_stack():
@@ -225,6 +243,7 @@
 
 def restore_rvmprof_stack(x):
     vmprof_tl_stack.setraw(x)
+    vmprof_start_sampling()
 
 #
 # traceback support
diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c
--- a/rpython/rlib/rvmprof/src/rvmprof.c
+++ b/rpython/rlib/rvmprof/src/rvmprof.c
@@ -12,6 +12,7 @@
 #endif
 
 
+#include "vmprof_common.h"
 
 #include "shared/vmprof_get_custom_offset.h"
 #ifdef VMPROF_UNIX
@@ -30,7 +31,7 @@
 }
 #endif
 
-long vmprof_get_profile_path(const char * buffer, long size)
+long vmprof_get_profile_path(char * buffer, long size)
 {
     return vmp_fd_to_path(vmp_profile_fileno(), buffer, size);
 }
diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h
--- a/rpython/rlib/rvmprof/src/rvmprof.h
+++ b/rpython/rlib/rvmprof/src/rvmprof.h
@@ -36,8 +36,8 @@
 RPY_EXTERN int vmprof_stack_append(void*, long);
 RPY_EXTERN long vmprof_stack_pop(void*);
 RPY_EXTERN void vmprof_stack_free(void*);
-RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, intptr_t*, intptr_t);
-RPY_EXTERN long vmprof_get_profile_path(const char *, long);
+RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, void**, intptr_t);
+RPY_EXTERN long vmprof_get_profile_path(char *, long);
 RPY_EXTERN int vmprof_stop_sampling(void);
 RPY_EXTERN void vmprof_start_sampling(void);
 
diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
--- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c
+++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c
@@ -262,7 +262,7 @@
     }
 
     int depth = 0;
-    PY_STACK_FRAME_T * top_most_frame = frame;
+    //PY_STACK_FRAME_T * top_most_frame = frame;
     while ((depth + _per_loop()) <= max_depth) {
         unw_get_proc_info(&cursor, &pip);
 
@@ -400,7 +400,7 @@
     if (fd == NULL) {
         return 0;
     }
-    char * saveptr;
+    char * saveptr = NULL;
     char * line = NULL;
     char * he = NULL;
     char * name;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c
@@ -4,6 +4,9 @@
 #include <errno.h>
 
 #ifdef RPYTHON_VMPROF
+
+int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc);
+
 #ifdef RPYTHON_LL2CTYPES
    /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */
 
@@ -193,7 +196,7 @@
 #endif
 
 intptr_t vmprof_get_traceback(void *stack, void *ucontext,
-                              intptr_t *result_p, intptr_t result_length)
+                              void **result_p, intptr_t result_length)
 {
     int n;
     int enabled;
diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
--- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h
+++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h
@@ -96,7 +96,7 @@
 #endif
 RPY_EXTERN
 intptr_t vmprof_get_traceback(void *stack, void *ucontext,
-                              intptr_t *result_p, intptr_t result_length);
+                              void **result_p, intptr_t result_length);
 #endif
 
 int vmprof_get_signal_type(void);
diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py
--- a/rpython/rlib/rvmprof/test/test_rvmprof.py
+++ b/rpython/rlib/rvmprof/test/test_rvmprof.py
@@ -144,7 +144,8 @@
 
     @pytest.fixture
     def init(self, tmpdir):
-        eci = ExternalCompilationInfo(compile_extra=['-g','-O0'],
+        eci = ExternalCompilationInfo(compile_extra=['-g','-O0', '-Werror'],
+                post_include_bits = ['int native_func(int);'],
                 separate_module_sources=["""
                 RPY_EXTERN int native_func(int d) {
                     int j = 0;

From pypy.commits at gmail.com  Fri Nov 24 02:18:36 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 23 Nov 2017 23:18:36 -0800 (PST)
Subject: [pypy-commit] pypy default: generate conf.h for tests
Message-ID: <5a17c7cc.52bf1c0a.cd6cb.ec74@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93153:d7c94a4970dd
Date: 2017-11-24 09:16 +0200
http://bitbucket.org/pypy/pypy/changeset/d7c94a4970dd/

Log:	generate conf.h for tests

diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+    if sys.platform.startswith('linux'):
+        from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+        configure_libbacktrace_linux()

From pypy.commits at gmail.com  Fri Nov 24 02:18:38 2017
From: pypy.commits at gmail.com (mattip)
Date: Thu, 23 Nov 2017 23:18:38 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: merge default into py3.5
Message-ID: <5a17c7ce.07d81c0a.b6ab9.f4a1@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: py3.5
Changeset: r93154:d2807ddb8178
Date: 2017-11-24 09:17 +0200
http://bitbucket.org/pypy/pypy/changeset/d2807ddb8178/

Log:	merge default into py3.5

diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+    if sys.platform.startswith('linux'):
+        from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+        configure_libbacktrace_linux()

From pypy.commits at gmail.com  Fri Nov 24 04:04:41 2017
From: pypy.commits at gmail.com (fijal)
Date: Fri, 24 Nov 2017 01:04:41 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: start working on pypyjson
Message-ID: <5a17e0a9.06b7df0a.2eba1.5b54@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93155:109fd5f5d4eb
Date: 2017-11-23 20:52 +0100
http://bitbucket.org/pypy/pypy/changeset/109fd5f5d4eb/

Log:	start working on pypyjson

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1760,10 +1760,6 @@
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
 
-    def unicode_w(self, w_obj):
-        # XXX: kill me!
-        return w_obj.utf8_w(self).decode('utf-8')
-
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode
+from rpython.rlib import rfloat, runicode, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.interpreter.error import oefmt
 from pypy.interpreter import unicodehelper
@@ -19,29 +19,6 @@
         return 0.0
     return x * NEG_POW_10[exp]
 
-def strslice2unicode_latin1(s, start, end):
-    """
-    Convert s[start:end] to unicode. s is supposed to be an RPython string
-    encoded in latin-1, which means that the numeric value of each char is the
-    same as the corresponding unicode code point.
-
-    Internally it's implemented at the level of low-level helpers, to avoid
-    the extra copy we would need if we take the actual slice first.
-
-    No bound checking is done, use carefully.
-    """
-    from rpython.rtyper.annlowlevel import llstr, hlunicode
-    from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
-    from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
-    length = end-start
-    ll_s = llstr(s)
-    ll_res = malloc(UNICODE, length)
-    ll_res.hash = 0
-    for i in range(length):
-        ch = ll_s.chars[start+i]
-        ll_res.chars[i] = cast_primitive(UniChar, ch)
-    return hlunicode(ll_res)
-
 def slice_eq(a, b):
     (ll_chars1, start1, length1, _) = a
     (ll_chars2, start2, length2, _) = b
@@ -312,8 +289,7 @@
             bits |= ord(ch)
             if ch == '"':
                 self.pos = i
-                return self.space.newunicode(
-                        self._create_string(start, i - 1, bits))
+                return self._create_string(start, i - 1, bits)
             elif ch == '\\' or ch < '\x20':
                 self.pos = i-1
                 return self.decode_string_escaped(start)
@@ -322,12 +298,15 @@
         if bits & 0x80:
             # the 8th bit is set, it's an utf8 string
             content_utf8 = self.getslice(start, end)
-            return unicodehelper.decode_utf8(self.space, content_utf8)
+            lgt, flag = unicodehelper.check_utf8_or_raise(self.space,
+                                                          content_utf8)
+            return self.space.newutf8(content_utf8, lgt, flag)
         else:
             # ascii only, fast path (ascii is a strict subset of
             # latin1, and we already checked that all the chars are <
             # 128)
-            return strslice2unicode_latin1(self.s, start, end)
+            return self.space.newutf8(self.getslice(start, end),
+                                      end - start, rutf8.FLAG_ASCII)
 
     def decode_string_escaped(self, start):
         i = self.pos
@@ -340,9 +319,10 @@
             i += 1
             if ch == '"':
                 content_utf8 = builder.build()
-                content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+                lgt, f = unicodehelper.check_utf8_or_raise(self.space,
+                                                           content_utf8)
                 self.pos = i
-                return self.space.newunicode(content_unicode)
+                return self.space.newutf8(content_utf8, lgt, f)
             elif ch == '\\':
                 i = self.decode_escape_sequence(i, builder)
             elif ch < '\x20':
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -10,10 +10,14 @@
     assert dec.skip_whitespace(8) == len(s)
     dec.close()
 
+class FakeSpace(object):
+    def newutf8(self, s, l, f):
+        return s
+
 def test_decode_key():
     s1 = "123" * 100
     s = ' "%s"   "%s" ' % (s1, s1)
-    dec = JSONDecoder('fake space', s)
+    dec = JSONDecoder(FakeSpace(), s)
     assert dec.pos == 0
     x = dec.decode_key(0)
     assert x == s1
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,23 +367,10 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
-    def new_from_utf8(self, utf8s):
-        # XXX: kill me!
-        assert isinstance(utf8s, str)
-        length, flag = rutf8.check_utf8(utf8s, True)
-        return W_UnicodeObject(utf8s, length, flag)
-
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
-    def newunicode(self, unistr):
-        # XXX: kill me!
-        assert isinstance(unistr, unicode)
-        utf8s = unistr.encode("utf-8")
-        length, flag = rutf8.check_utf8(utf8s, True)
-        return self.newutf8(utf8s, length, flag)
-
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)

From pypy.commits at gmail.com  Fri Nov 24 04:04:43 2017
From: pypy.commits at gmail.com (fijal)
Date: Fri, 24 Nov 2017 01:04:43 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge
Message-ID: <5a17e0ab.8dc1df0a.ef7c5.6dd7@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93156:8fac293591e9
Date: 2017-11-24 10:04 +0100
http://bitbucket.org/pypy/pypy/changeset/8fac293591e9/

Log:	merge

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -223,14 +223,7 @@
 
     def _find_line_ending(self, line, start, end):
         size = end - start
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            pos = line.find('\n', start, end)
-            if pos >= 0:
-                return pos + 1, 0
-            else:
-                return -1, size
-        elif self.readuniversal:
+        if self.readuniversal:
             # Universal newline search. Find any of \r, \r\n, \n
             # The decoder ensures that \r\n are not split in two pieces
             i = start
@@ -249,16 +242,22 @@
                         return i + 1, 0
                     else:
                         return i, 0
+        if self.readtranslate:
+            # Newlines are already translated, only search for \n
+            newline = '\n'
         else:
             # Non-universal mode.
-            pos = line.find(self.readnl, start, end)
-            if pos >= 0:
-                return pos + len(self.readnl), 0
-            else:
-                pos = line.find(self.readnl[0], start, end)
-                if pos >= 0:
-                    return -1, pos - start
-                return -1, size
+            newline = self.readnl
+        end_scan = end - len(newline) + 1
+        for i in range(start, end_scan):
+            ch = line[i]
+            if ch == newline[0]:
+                for j in range(1, len(newline)):
+                    if line[i + j] != newline[j]:
+                        break
+                else:
+                    return i + len(newline), 0
+        return -1, end_scan
 
 
 W_TextIOBase.typedef = TypeDef(
@@ -548,6 +547,10 @@
         self.decoded_chars_used += size
         return chars
 
+    def _has_data(self):
+        return (self.decoded_chars is not None and
+            self.decoded_chars_used < len(self.decoded_chars))
+
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
@@ -595,6 +598,19 @@
 
         return not eof
 
+    def _ensure_data(self, space):
+        while not self._has_data():
+            try:
+                if not self._read_chunk(space):
+                    self._unset_decoded()
+                    self.snapshot = None
+                    return False
+            except OperationError as e:
+                if trap_eintr(space, e):
+                    continue
+                raise
+        return True
+
     def next_w(self, space):
         self._check_attached(space)
         self.telling = False
@@ -628,23 +644,13 @@
         builder = StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
-        while True:
+        while remaining > 0:
+            if not self._ensure_data(space):
+                break
             data = self._get_decoded_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
-            if remaining <= 0: # Done
-                break
-
-            try:
-                if not self._read_chunk(space):
-                    # EOF
-                    break
-            except OperationError as e:
-                if trap_eintr(space, e):
-                    continue
-                raise
-
         return space.new_from_utf8(builder.build())
 
     def readline_w(self, space, w_limit=None):
@@ -660,20 +666,9 @@
 
         while True:
             # First, get some data if necessary
-            has_data = True
-            while not self.decoded_chars:
-                try:
-                    if not self._read_chunk(space):
-                        has_data = False
-                        break
-                except OperationError as e:
-                    if trap_eintr(space, e):
-                        continue
-                    raise
+            has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                self._unset_decoded()
-                self.snapshot = None
                 start = endpos = offset_to_buffer = 0
                 break
 

From pypy.commits at gmail.com  Fri Nov 24 04:53:49 2017
From: pypy.commits at gmail.com (fijal)
Date: Fri, 24 Nov 2017 01:53:49 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix _ssl module
Message-ID: <5a17ec2d.dc361c0a.1637d.6dbf@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93157:8a24f68050df
Date: 2017-11-24 10:53 +0100
http://bitbucket.org/pypy/pypy/changeset/8a24f68050df/

Log:	fix _ssl module

diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -1566,12 +1566,13 @@
                 cadata = space.bufferstr_w(w_cadata)
             else:
                 ca_file_type = SSL_FILETYPE_PEM
-                try:
-                    cadata = space.unicode_w(w_cadata).encode('ascii')
-                except UnicodeEncodeError:
+                w_uni = space.convert_arg_to_w_unicode(w_cadata)
+                if not w_uni.is_ascii():
                     raise oefmt(space.w_TypeError,
                                 "cadata should be a ASCII string or a "
                                 "bytes-like object")
+                cadata = space.utf8_w(w_uni)
+
         if cafile is None and capath is None and cadata is None:
             raise oefmt(space.w_TypeError,
                         "cafile and capath cannot be both omitted")

From pypy.commits at gmail.com  Fri Nov 24 05:16:59 2017
From: pypy.commits at gmail.com (fijal)
Date: Fri, 24 Nov 2017 02:16:59 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: start fixing _rawffi
Message-ID: <5a17f19b.f3c4df0a.91aaa.bb7f@mx.google.com>

Author: fijal
Branch: unicode-utf8
Changeset: r93158:467a32f09dd6
Date: 2017-11-24 11:16 +0100
http://bitbucket.org/pypy/pypy/changeset/467a32f09dd6/

Log:	start fixing _rawffi

diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -167,8 +167,8 @@
         addr = rffi.cast(rffi.ULONG, buf)
         self.argchain.arg(addr)
 
-    def handle_unichar_p(self, w_ffitype, w_obj, unicodeval):
-        buf = rffi.unicode2wcharp(unicodeval)
+    def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len):
+        buf = rffi.utf82wcharp(utf8val, utf8len)
         self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf))
         addr = rffi.cast(rffi.ULONG, buf)
         self.argchain.arg(addr)
diff --git a/pypy/module/_rawffi/alt/test/test_type_converter.py b/pypy/module/_rawffi/alt/test/test_type_converter.py
--- a/pypy/module/_rawffi/alt/test/test_type_converter.py
+++ b/pypy/module/_rawffi/alt/test/test_type_converter.py
@@ -6,7 +6,7 @@
 
 class DummyFromAppLevelConverter(FromAppLevelConverter):
 
-    def handle_all(self, w_ffitype, w_obj, val):
+    def handle_all(self, w_ffitype, w_obj, val, lgt=None):
         self.lastval = val
 
     handle_signed = handle_all
@@ -120,8 +120,8 @@
     def test_strings(self):
         # first, try automatic conversion from applevel
         self.check(app_types.char_p, self.space.newbytes('foo'), 'foo')
-        self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234')
-        self.check(app_types.unichar_p, self.space.wrap('foo'), u'foo')
+        self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234'.encode('utf8'))
+        self.check(app_types.unichar_p, self.space.wrap('foo'), 'foo')
         # then, try to pass explicit pointers
         self.check(app_types.char_p, self.space.wrap(42), 42)
         self.check(app_types.unichar_p, self.space.wrap(42), 42)
diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -1,6 +1,6 @@
 from rpython.rlib import libffi
-from rpython.rlib import jit
-from rpython.rlib.rarithmetic import r_uint
+from rpython.rlib import jit, rutf8
+from rpython.rlib.rarithmetic import r_uint, intmask
 from pypy.interpreter.error import oefmt
 from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 from pypy.module._rawffi.alt.interp_ffitype import app_types
@@ -85,8 +85,8 @@
             return True
         elif w_ffitype.is_unichar_p() and (w_type is self.space.w_bytes or
                                            w_type is self.space.w_unicode):
-            unicodeval = self.space.unicode_w(w_obj)
-            self.handle_unichar_p(w_ffitype, w_obj, unicodeval)
+            utf8, lgt = self.space.utf8_len_w(w_obj)
+            self.handle_unichar_p(w_ffitype, w_obj, utf8, lgt)
             return True
         return False
 
@@ -147,7 +147,7 @@
         """
         self.error(w_ffitype, w_obj)
 
-    def handle_unichar_p(self, w_ffitype, w_obj, unicodeval):
+    def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len):
         """
         unicodeval: interp-level unicode
         """
@@ -228,7 +228,8 @@
             return space.newbytes(chr(ucharval))
         elif w_ffitype.is_unichar():
             wcharval = self.get_unichar(w_ffitype)
-            return space.newunicode(unichr(wcharval))
+            return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1,
+                                 rutf8.get_flag_from_code(intmask(wcharval)))
         elif w_ffitype.is_double():
             return self._float(w_ffitype)
         elif w_ffitype.is_singlefloat():
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -10,6 +10,7 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.tool import rffi_platform
 from rpython.rlib.unroll import unrolling_iterable
+from rpython.rlib import rutf8
 from rpython.rlib.objectmodel import specialize
 import rpython.rlib.rposix as rposix
 
@@ -416,13 +417,13 @@
         val = s[0]
         push_func(add_arg, argdesc, val)
     elif letter == 'u':
-        s = space.unicode_w(w_arg)
-        if len(s) != 1:
+        s, lgt = space.utf8_len_w(w_arg)
+        if lgt != 1:
             raise oefmt(space.w_TypeError,
                         "Expected unicode string of length one as wide "
                         "character")
-        val = s[0]
-        push_func(add_arg, argdesc, val)
+        val = rutf8.codepoint_at_pos(s, 0)
+        push_func(add_arg, argdesc, rffi.cast(rffi.WCHAR_T, val))
     else:
         for c in unroll_letters_for_numbers:
             if letter == c:
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1024,13 +1024,14 @@
 def utf82wcharp(utf8, utf8len):
     from rpython.rlib import rutf8
 
-    w = lltype.malloc(CWCHARP.TO, utf8len, flavor='raw')
+    w = lltype.malloc(CWCHARP.TO, utf8len + 1, flavor='raw')
     i = 0
     index = 0
     while i < len(utf8):
         w[index] = unichr(rutf8.codepoint_at_pos(utf8, i))
         i = rutf8.next_codepoint_pos(utf8, i)
         index += 1
+    w[index] = unichr(0)
     return w
 
 # char**

From pypy.commits at gmail.com  Fri Nov 24 06:50:51 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 24 Nov 2017 03:50:51 -0800 (PST)
Subject: [pypy-commit] buildbot default: not needed,
 virtualenv is deleted by "hg purge"
Message-ID: <5a18079b.42e61c0a.73c2e.9727@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1042:0a18cb374a4e
Date: 2017-11-24 13:49 +0200
http://bitbucket.org/pypy/buildbot/changeset/0a18cb374a4e/

Log:	not needed, virtualenv is deleted by "hg purge"

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -563,14 +563,8 @@
 
         if platform == 'win32':
             self.virt_python = r'virt_test\Scripts\python.exe'
-            clean = 'rmdir /s /q virt-test'
         else:
             self.virt_python = 'virt_test/bin/python'
-            clean = 'rm -rf virt-test'
-        self.addStep(ShellCmd(
-            description="clean old virtualenv",
-            command=clean,
-            haltOnFailure=False))
         self.addStep(ShellCmd(
             description="create virtualenv for tests",
             command=['virtualenv', 'virt_test'],

From pypy.commits at gmail.com  Fri Nov 24 06:50:53 2017
From: pypy.commits at gmail.com (mattip)
Date: Fri, 24 Nov 2017 03:50:53 -0800 (PST)
Subject: [pypy-commit] buildbot default: update pip, setuptools
Message-ID: <5a18079d.8dc1df0a.ef7c5.7337@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1043:0548ff25f980
Date: 2017-11-24 13:50 +0200
http://bitbucket.org/pypy/buildbot/changeset/0548ff25f980/

Log:	update pip, setuptools

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -572,7 +572,14 @@
             ))
 
         self.addStep(ShellCmd(
-            description="install requirments to virtual environment",
+            description="update pip",
+            command=[self.virt_python, '-mpip', 'install', '--upgrade',
+                     'pip' , 'setuptools'],
+            haltOnFailure=True,
+            ))
+
+        self.addStep(ShellCmd(
+            description="install requirements to virtual environment",
             command=[self.virt_python, '-mpip', 'install', '-r',
                      'requirements.txt'],
             haltOnFailure=True,

From pypy.commits at gmail.com  Fri Nov 24 08:00:47 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 05:00:47 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix more tests
Message-ID: <5a1817ff.c23a1c0a.d3e0.7191@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93160:a9bb96fbf9d4
Date: 2017-11-24 13:53 +0100
http://bitbucket.org/pypy/pypy/changeset/a9bb96fbf9d4/

Log:	fix more tests

	BUT: a slight pessimization, because object decoding becomes a
	little bit slower

diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -247,10 +247,11 @@
             self.pos = i+1
             return self.space.newdict()
 
-        d = {}
+        # XXX this should be improved to use an unwrapped dict
+        w_dict = self.space.newdict()
         while True:
             # parse a key: value
-            name = self.decode_key(i)
+            w_name = self.decode_key(i)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             if ch != ':':
@@ -259,13 +260,13 @@
             i = self.skip_whitespace(i)
             #
             w_value = self.decode_any(i)
-            d[name] = w_value
+            self.space.setitem(w_dict, w_name, w_value)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             i += 1
             if ch == '}':
                 self.pos = i
-                return self._create_dict(d)
+                return w_dict
             elif ch == ',':
                 pass
             elif ch == '\0':
@@ -274,10 +275,6 @@
                 self._raise("Unexpected '%s' when decoding object (char %d)",
                             ch, i-1)
 
-    def _create_dict(self, d):
-        from pypy.objspace.std.dictmultiobject import from_unicode_key_dict
-        return from_unicode_key_dict(self.space, d)
-
     def decode_string(self, i):
         start = i
         bits = 0
@@ -383,7 +380,7 @@
         return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
 
     def decode_key(self, i):
-        """ returns an unwrapped unicode """
+        """ returns a wrapped unicode """
         from rpython.rlib.rarithmetic import intmask
 
         i = self.skip_whitespace(i)
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1257,12 +1257,6 @@
 create_iterator_classes(UnicodeDictStrategy)
 
 
-def from_unicode_key_dict(space, d):
-    strategy = space.fromcache(UnicodeDictStrategy)
-    storage = strategy.erase(d)
-    return W_DictObject(space, strategy, storage)
-
-
 class IntDictStrategy(AbstractTypedStrategy, DictStrategy):
     erase, unerase = rerased.new_erasing_pair("int")
     erase = staticmethod(erase)

From pypy.commits at gmail.com  Fri Nov 24 08:00:45 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 05:00:45 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix unicode \-encoding in _pypyjson
Message-ID: <5a1817fd.d7941c0a.d4557.3084@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93159:82223a975b6b
Date: 2017-11-24 13:00 +0100
http://bitbucket.org/pypy/pypy/changeset/82223a975b6b/

Log:	fix unicode \-encoding in _pypyjson

diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -369,8 +369,7 @@
             return # help the annotator to know that we'll never go beyond
                    # this point
         #
-        uchr = runicode.code_to_unichr(val)     # may be a surrogate pair again
-        utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
+        utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True)
         builder.append(utf8_ch)
         return i
 

From pypy.commits at gmail.com  Fri Nov 24 08:27:58 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 05:27:58 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: add todo
Message-ID: <5a181e5e.caa2df0a.c6316.e3d8@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93161:8dac9e38c3d5
Date: 2017-11-24 14:27 +0100
http://bitbucket.org/pypy/pypy/changeset/8dac9e38c3d5/

Log:	add todo

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -9,3 +9,5 @@
 * remove assertions from W_UnicodeObject.__init__ if all the builders pass
 * what to do with error handlers that go backwards. There were tests
   in test_codecs that would check for that
+
+* fix _pypyjson to not use a wrapped dict when decoding an object

From pypy.commits at gmail.com  Fri Nov 24 09:15:38 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 06:15:38 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: fix encoding to operate on utf-8
 encoded strings
Message-ID: <5a18298a.e1acdf0a.beeec.a3b8@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93163:5b81f483c459
Date: 2017-11-24 15:14 +0100
http://bitbucket.org/pypy/pypy/changeset/5b81f483c459/

Log:	fix encoding to operate on utf-8 encoded strings

diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,5 +1,5 @@
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.runicode import str_decode_utf_8
+from rpython.rlib import rutf8
 from pypy.interpreter import unicodehelper
 
 
@@ -30,11 +30,8 @@
             # the input is a string with only non-special ascii chars
             return w_string
 
-        eh = unicodehelper.decode_error_handler(space)
-        u = str_decode_utf_8(
-                s, len(s), None, final=True, errorhandler=eh,
-                allow_surrogates=True)[0]
-        sb = StringBuilder(len(u))
+        unicodehelper.check_utf8_or_raise(space, s)
+        sb = StringBuilder(len(s))
         sb.append_slice(s, 0, first)
     else:
         # We used to check if 'u' contains only safe characters, and return
@@ -44,29 +41,31 @@
         # a string (with the ascii encoding).  This requires two passes
         # over the characters.  So we may as well directly turn it into a
         # string here --- only one pass.
-        u = space.unicode_w(w_string)
-        sb = StringBuilder(len(u))
+        s = space.utf8_w(w_string)
+        sb = StringBuilder(len(s))
         first = 0
 
-    for i in range(first, len(u)):
-        c = u[i]
-        if c <= u'~':
-            if c == u'"' or c == u'\\':
+    it = rutf8.Utf8StringIterator(s)
+    for i in range(first):
+        it.next()
+    for c in it:
+        if c <= ord('~'):
+            if c == ord('"') or c == ord('\\'):
                 sb.append('\\')
-            elif c < u' ':
-                sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+            elif c < ord(' '):
+                sb.append(ESCAPE_BEFORE_SPACE[c])
                 continue
-            sb.append(chr(ord(c)))
+            sb.append(chr(c))
         else:
-            if c <= u'\uffff':
+            if c <= ord(u'\uffff'):
                 sb.append('\\u')
-                sb.append(HEX[ord(c) >> 12])
-                sb.append(HEX[(ord(c) >> 8) & 0x0f])
-                sb.append(HEX[(ord(c) >> 4) & 0x0f])
-                sb.append(HEX[ord(c) & 0x0f])
+                sb.append(HEX[c >> 12])
+                sb.append(HEX[(c >> 8) & 0x0f])
+                sb.append(HEX[(c >> 4) & 0x0f])
+                sb.append(HEX[c & 0x0f])
             else:
                 # surrogate pair
-                n = ord(c) - 0x10000
+                n = c - 0x10000
                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
                 sb.append('\\ud')
                 sb.append(HEX[(s1 >> 8) & 0x0f])

From pypy.commits at gmail.com  Fri Nov 24 09:15:35 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 06:15:35 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: use an actual iterator,
 to make the code nicer (they work well in rpython nowadays)
Message-ID: <5a182987.1cbf1c0a.deee6.0ee3@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93162:6a13aba253bd
Date: 2017-11-24 15:07 +0100
http://bitbucket.org/pypy/pypy/changeset/6a13aba253bd/

Log:	use an actual iterator, to make the code nicer (they work well in
	rpython nowadays)

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -702,10 +702,12 @@
         self._end = len(utf8s)
         self._pos = 0
 
-    def done(self):
-        return self._pos == self._end
+    def __iter__(self):
+        return self
 
     def next(self):
+        if self._pos == self._end:
+            raise StopIteration
         ret = codepoint_at_pos(self._utf8, self._pos)
         self._pos = next_codepoint_pos(self._utf8, self._pos)
         return ret
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -188,6 +188,6 @@
 def test_utf8_iterator(arg):
     u = rutf8.Utf8StringIterator(arg.encode('utf8'))
     l = []
-    while not u.done():
-        l.append(unichr(u.next()))
+    for c in u:
+        l.append(unichr(c))
     assert list(arg) == l

From pypy.commits at gmail.com  Fri Nov 24 10:13:23 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 07:13:23 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: support for append_utf8
Message-ID: <5a183713.c78c1c0a.ebbca.0ce7@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93164:f5be33826726
Date: 2017-11-24 16:10 +0100
http://bitbucket.org/pypy/pypy/changeset/f5be33826726/

Log:	support for append_utf8

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -687,6 +687,11 @@
         self._lgt += 1
         unichr_as_utf8_append(self._s, code, True)
 
+    def append_utf8(self, utf8, length, flag):
+        self._flag = combine_flags(self._flag, flag)
+        self._lgt += length
+        self._s.append(utf8)
+
     def build(self):
         return self._s.build()
 
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -175,6 +175,7 @@
     assert s.get_flag() == rutf8.FLAG_REGULAR
     assert s.get_length() == 9
     assert s.build().decode("utf8") == u"foox\u1234foox"
+
     s = rutf8.Utf8StringBuilder()
     s.append_code(0x1234)
     assert s.build().decode("utf8") == u"\u1234"
@@ -184,6 +185,21 @@
     assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
     assert s.get_length() == 2
 
+    s = rutf8.Utf8StringBuilder()
+    s.append_utf8("abc", 3, rutf8.FLAG_ASCII)
+    assert s.get_flag() == rutf8.FLAG_ASCII
+    assert s.get_length() == 1
+    assert s.build().decode("utf8") == u"abc"
+
+    s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR)
+    assert s.build().decode("utf8") == u"abc\u1234"
+    assert s.get_flag() == rutf8.FLAG_REGULAR
+    assert s.get_length() == 4
+
+    s.append_code(0xD800)
+    assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES
+    assert s.get_length() == 5
+
 @given(strategies.text())
 def test_utf8_iterator(arg):
     u = rutf8.Utf8StringIterator(arg.encode('utf8'))

From pypy.commits at gmail.com  Fri Nov 24 10:13:25 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 07:13:25 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: replace a lot of uses of
 StringBuilder by Utf8StringBuilder
Message-ID: <5a183715.3799df0a.a1cf0.9898@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93165:48da1a44d860
Date: 2017-11-24 16:12 +0100
http://bitbucket.org/pypy/pypy/changeset/48da1a44d860/

Log:	replace a lot of uses of StringBuilder by Utf8StringBuilder

diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -64,6 +64,11 @@
         # - malloced object, which means it has index, then
         #   _index_storage.flags determines the kind
 
+    @staticmethod
+    def from_utf8builder(builder):
+        return W_UnicodeObject(
+            builder.build(), builder.get_length(), builder.get_flag())
+
     def __repr__(self):
         """representation for debugging purposes"""
         return "%s(%r)" % (self.__class__.__name__, self._utf8)
@@ -344,57 +349,38 @@
         return mod_format(space, w_values, self, do_unicode=True)
 
     def descr_swapcase(self, space):
-        selfvalue = self._utf8
-        builder = StringBuilder(len(selfvalue))
-        flag = self._get_flag()
-        i = 0
-        while i < len(selfvalue):
-            ch = rutf8.codepoint_at_pos(selfvalue, i)
-            i = rutf8.next_codepoint_pos(selfvalue, i)
+        input = self._utf8
+        builder = rutf8.Utf8StringBuilder(len(input))
+        for ch in rutf8.Utf8StringIterator(input):
             if unicodedb.isupper(ch):
                 ch = unicodedb.tolower(ch)
             elif unicodedb.islower(ch):
                 ch = unicodedb.toupper(ch)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-        return W_UnicodeObject(builder.build(), self._length, flag)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     def descr_title(self, space):
         if len(self._utf8) == 0:
             return self
-        utf8, flag = self.title_unicode(self._utf8)
-        return W_UnicodeObject(utf8, self._len(), flag)
+        return self.title_unicode(self._utf8)
 
     @jit.elidable
     def title_unicode(self, value):
         input = self._utf8
-        builder = StringBuilder(len(input))
-        i = 0
+        builder = rutf8.Utf8StringBuilder(len(input))
         previous_is_cased = False
-        flag = self._get_flag()
-        while i < len(input):
-            ch = rutf8.codepoint_at_pos(input, i)
-            i = rutf8.next_codepoint_pos(input, i)
+        for ch in rutf8.Utf8StringIterator(input):
             if not previous_is_cased:
                 ch = unicodedb.totitle(ch)
             else:
                 ch = unicodedb.tolower(ch)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
+            builder.append_code(ch)
             previous_is_cased = unicodedb.iscased(ch)
-        return builder.build(), flag
+        return self.from_utf8builder(builder)
 
     def descr_translate(self, space, w_table):
-        input = self._utf8
-        result = StringBuilder(len(input))
-        result_length = 0
-        flag = self._get_flag()
-        i = 0
-        while i < len(input):
-            codepoint = rutf8.codepoint_at_pos(input, i)
-            i = rutf8.next_codepoint_pos(input, i)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for codepoint in rutf8.Utf8StringIterator(self._utf8):
             try:
                 w_newval = space.getitem(w_table, space.newint(codepoint))
             except OperationError as e:
@@ -406,24 +392,19 @@
                 elif space.isinstance_w(w_newval, space.w_int):
                     codepoint = space.int_w(w_newval)
                 elif isinstance(w_newval, W_UnicodeObject):
-                    result.append(w_newval._utf8)
-                    flag = rutf8.combine_flags(flag, w_newval._get_flag())
-                    result_length += w_newval._length
+                    builder.append_utf8(
+                        w_newval._utf8, w_newval._length, w_newval._get_flag())
                     continue
                 else:
                     raise oefmt(space.w_TypeError,
                                 "character mapping must return integer, None "
                                 "or unicode")
             try:
-                if codepoint >= 0x80:
-                    flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-                rutf8.unichr_as_utf8_append(result, codepoint,
-                                            allow_surrogates=True)
-                result_length += 1
+                builder.append_code(codepoint)
             except ValueError:
                 raise oefmt(space.w_TypeError,
                             "character mapping must be in range(0x110000)")
-        return W_UnicodeObject(result.build(), result_length, flag)
+        return self.from_utf8builder(builder)
 
     def descr_find(self, space, w_sub, w_start=None, w_end=None):
         w_result = self._unwrap_and_search(space, w_sub, w_start, w_end)
@@ -534,16 +515,11 @@
         return tformat.formatter_field_name_split()
 
     def descr_lower(self, space):
-        builder = StringBuilder(len(self._utf8))
-        pos = 0
-        flag = self._get_flag()
-        while pos < len(self._utf8):
-            lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
-            if lower >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True)
-            pos = rutf8.next_codepoint_pos(self._utf8, pos)
-        return W_UnicodeObject(builder.build(), self._len(), flag)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for ch in rutf8.Utf8StringIterator(self._utf8):
+            lower = unicodedb.tolower(ch)
+            builder.append_code(lower)
+        return self.from_utf8builder(builder)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -711,18 +687,11 @@
         return space.newlist(strs_w)
 
     def descr_upper(self, space):
-        value = self._utf8
-        builder = StringBuilder(len(value))
-        flag = self._get_flag()
-        i = 0
-        while i < len(value):
-            uchar = rutf8.codepoint_at_pos(value, i)
-            uchar = unicodedb.toupper(uchar)
-            if uchar >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True)
-        return W_UnicodeObject(builder.build(), self._length, flag)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for ch in rutf8.Utf8StringIterator(self._utf8):
+            ch = unicodedb.toupper(ch)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
@@ -826,22 +795,15 @@
         if len(value) == 0:
             return self._empty()
 
-        flag = self._get_flag()
-        builder = StringBuilder(len(value))
-        uchar = rutf8.codepoint_at_pos(value, 0)
-        i = rutf8.next_codepoint_pos(value, 0)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        it = rutf8.Utf8StringIterator(self._utf8)
+        uchar = it.next()
         ch = unicodedb.toupper(uchar)
-        rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-        if ch >= 0x80:
-            flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-        while i < len(value):
-            uchar = rutf8.codepoint_at_pos(value, i)
-            i = rutf8.next_codepoint_pos(value, i)
-            ch = unicodedb.tolower(uchar)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-        return W_UnicodeObject(builder.build(), self._len(), flag)
+        builder.append_code(ch)
+        for ch in it:
+            ch = unicodedb.tolower(ch)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_center(self, space, width, w_fillchar):

From pypy.commits at gmail.com  Fri Nov 24 10:24:37 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Fri, 24 Nov 2017 07:24:37 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: small cleanup of copy-pasted join
 code
Message-ID: <5a1839b5.e6361c0a.e0caa.c69f@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: unicode-utf8
Changeset: r93166:f5a5189e5314
Date: 2017-11-24 16:24 +0100
http://bitbucket.org/pypy/pypy/changeset/f5a5189e5314/

Log:	small cleanup of copy-pasted join code

diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -498,12 +498,6 @@
     def _join_return_one(self, space, w_obj):
         return space.is_w(space.type(w_obj), space.w_unicode)
 
-    def _join_check_item(self, space, w_obj):
-        if (space.isinstance_w(w_obj, space.w_bytes) or
-            space.isinstance_w(w_obj, space.w_unicode)):
-            return 0
-        return 1
-
     def descr_formatter_parser(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
         tformat = unicode_template_formatter(space, space.utf8_w(self))
@@ -633,13 +627,11 @@
         flag = self._get_flag()
         for i in range(size):
             w_s = list_w[i]
-            check_item = self._join_check_item(space, w_s)
-            if check_item == 1:
+            if not (space.isinstance_w(w_s, space.w_bytes) or
+                    space.isinstance_w(w_s, space.w_unicode)):
                 raise oefmt(space.w_TypeError,
-                            "sequence item %d: expected string, %T found",
+                            "sequence item %d: expected string or unicode, %T found",
                             i, w_s)
-            elif check_item == 2:
-                return self._join_autoconvert(space, list_w)
             # XXX Maybe the extra copy here is okay? It was basically going to
             #     happen anyway, what with being placed into the builder
             w_u = self.convert_arg_to_w_unicode(space, w_s)

From pypy.commits at gmail.com  Fri Nov 24 12:22:06 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 09:22:06 -0800 (PST)
Subject: [pypy-commit] pypy default: Keep chipping away at readline_w()
Message-ID: <5a18553e.759adf0a.6067e.67fc@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93167:2477eb379774
Date: 2017-11-24 17:20 +0000
http://bitbucket.org/pypy/pypy/changeset/2477eb379774/

Log:	Keep chipping away at readline_w()

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -214,44 +214,53 @@
     def newlines_get_w(self, space):
         return space.w_None
 
+    def _find_newline_universal(self, line, start, end):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        i = start
+        while i < end:
+            ch = line[i]
+            i += 1
+            if ch == '\n':
+                return i
+            if ch == '\r':
+                if start + i >= end:
+                    return i
+                if line[i] == '\n':
+                    return i + 1
+                else:
+                    return i
+        return -1
+
+    def _find_marker(self, marker, line, start, end):
+        for i in range(start, end - len(marker) + 1):
+            ch = line[i]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if line[i + j] != marker[j]:
+                        break
+                else:
+                    return i + len(marker)
+        return -1
+
     def _find_line_ending(self, line, start, end):
-        size = end - start
         if self.readuniversal:
-            # Universal newline search. Find any of \r, \r\n, \n
-            # The decoder ensures that \r\n are not split in two pieces
-            i = start
-            while True:
-                # Fast path for non-control chars.
-                while i < end and line[i] > '\r':
-                    i += 1
-                if i >= end:
-                    return -1, size
-                ch = line[i]
-                i += 1
-                if ch == '\n':
-                    return i, 0
-                if ch == '\r':
-                    if line[i] == '\n':
-                        return i + 1, 0
-                    else:
-                        return i, 0
+            i = self._find_newline_universal(line, start, end)
+            if i < 0:
+                return i, end
+            else:
+                return i, 0
         if self.readtranslate:
             # Newlines are already translated, only search for \n
             newline = u'\n'
         else:
             # Non-universal mode.
             newline = self.readnl
-        end_scan = end - len(newline) + 1
-        for i in range(start, end_scan):
-            ch = line[i]
-            if ch == newline[0]:
-                for j in range(1, len(newline)):
-                    if line[i + j] != newline[j]:
-                        break
-                else:
-                    return i + len(newline), 0
-        return -1, end_scan
-
+        i = self._find_marker(newline, line, start, end)
+        if i < 0:
+            return i, end - len(newline) + 1
+        else:
+            return i, 0
 
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
@@ -654,7 +663,7 @@
         limit = convert_size(space, w_limit)
 
         line = None
-        remaining = None
+        remnant = None
         builder = UnicodeBuilder()
 
         while True:
@@ -665,44 +674,43 @@
                 start = endpos = offset_to_buffer = 0
                 break
 
-            if not remaining:
+            if not remnant:
                 line = self.decoded_chars
                 start = self.decoded_chars_used
                 offset_to_buffer = 0
             else:
                 assert self.decoded_chars_used == 0
-                line = remaining + self.decoded_chars
+                line = remnant + self.decoded_chars
                 start = 0
-                offset_to_buffer = len(remaining)
-                remaining = None
+                offset_to_buffer = len(remnant)
+                remnant = None
 
             line_len = len(line)
-            endpos, consumed = self._find_line_ending(line, start, line_len)
+            endpos, end_scan = self._find_line_ending(line, start, line_len)
             chunked = builder.getlength()
             if endpos >= 0:
                 if limit >= 0 and endpos >= start + limit - chunked:
                     endpos = start + limit - chunked
                     assert endpos >= 0
                 break
-            assert consumed >= 0
+            assert end_scan >= 0
 
-            # We can put aside up to `endpos`
-            endpos = consumed + start
-            if limit >= 0 and endpos >= start + limit - chunked:
+            # We can put aside up to `end_scan`
+            if limit >= 0 and end_scan >= limit - chunked:
                 # Didn't find line ending, but reached length limit
                 endpos = start + limit - chunked
                 assert endpos >= 0
                 break
 
             # No line ending seen yet - put aside current data
-            if endpos > start:
-                s = line[start:endpos]
+            if end_scan > start:
+                s = line[start:end_scan]
                 builder.append(s)
 
-            # There may be some remaining bytes we'll have to prepend to the
+            # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if endpos < line_len:
-                remaining = line[endpos:]
+            if end_scan < line_len:
+                remnant = line[end_scan:]
             line = None
             # We have consumed the buffer
             self._unset_decoded()
@@ -715,8 +723,8 @@
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
             builder.append(line)
-        elif remaining:
-            builder.append(remaining)
+        elif remnant:
+            builder.append(remnant)
 
         result = builder.build()
         return space.newunicode(result)

From pypy.commits at gmail.com  Fri Nov 24 14:45:56 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 11:45:56 -0800 (PST)
Subject: [pypy-commit] pypy default: More refactoring: deal with the remnant
 more explicitly and handle size limit inside _find_line_ending()
Message-ID: <5a1876f4.0b0f1c0a.ac5e3.0fde@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93168:189c2cce360e
Date: 2017-11-24 19:43 +0000
http://bitbucket.org/pypy/pypy/changeset/189c2cce360e/

Log:	More refactoring: deal with the remnant more explicitly and handle
	size limit inside _find_line_ending()

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,27 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(txt, mode, limit):
+    textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode)
+    lines = []
+    while True:
+        line = textio.readline(limit)
+        if limit > 0:
+            assert len(line) < limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -174,18 +174,16 @@
         start = self.pos
         if limit < 0 or limit > len(self.buf) - self.pos:
             limit = len(self.buf) - self.pos
-
         assert limit >= 0
-        end = start + limit
 
         endpos, consumed = self._find_line_ending(
             # XXX: super inefficient, makes a copy of the entire contents.
             u"".join(self.buf),
             start,
-            end
+            limit
         )
         if endpos < 0:
-            endpos = end
+            endpos = start + limit
         assert endpos >= 0
         self.pos = endpos
         return space.newunicode(u"".join(self.buf[start:endpos]))
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -214,53 +214,49 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_newline_universal(self, line, start, end):
+    def _find_newline_universal(self, line, start, limit):
         # Universal newline search. Find any of \r, \r\n, \n
         # The decoder ensures that \r\n are not split in two pieces
+        limit = min(limit, len(line) - start)
+        end = start + limit
         i = start
         while i < end:
             ch = line[i]
             i += 1
             if ch == '\n':
-                return i
+                return i, 0
             if ch == '\r':
-                if start + i >= end:
-                    return i
+                if i >= end:
+                    break
                 if line[i] == '\n':
-                    return i + 1
+                    return i + 1, 0
                 else:
-                    return i
-        return -1
+                    return i, 0
+        return -1, end
 
-    def _find_marker(self, marker, line, start, end):
+    def _find_marker(self, marker, line, start, limit):
+        limit = min(limit, len(line) - start)
+        end = start + limit
         for i in range(start, end - len(marker) + 1):
             ch = line[i]
             if ch == marker[0]:
                 for j in range(1, len(marker)):
                     if line[i + j] != marker[j]:
-                        break
+                        break  # from inner loop
                 else:
-                    return i + len(marker)
-        return -1
+                    return i + len(marker), 0
+        return -1, end - len(marker) + 1
 
-    def _find_line_ending(self, line, start, end):
+    def _find_line_ending(self, line, start, limit):
         if self.readuniversal:
-            i = self._find_newline_universal(line, start, end)
-            if i < 0:
-                return i, end
-            else:
-                return i, 0
+            return self._find_newline_universal(line, start, limit)
         if self.readtranslate:
             # Newlines are already translated, only search for \n
             newline = u'\n'
         else:
             # Non-universal mode.
             newline = self.readnl
-        i = self._find_marker(newline, line, start, end)
-        if i < 0:
-            return i, end - len(newline) + 1
-        else:
-            return i, 0
+        return self._find_marker(newline, line, start, limit)
 
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
@@ -671,35 +667,42 @@
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = endpos = offset_to_buffer = 0
+                start = endpos = 0
                 break
 
-            if not remnant:
-                line = self.decoded_chars
-                start = self.decoded_chars_used
-                offset_to_buffer = 0
-            else:
+            if remnant:
+                assert not self.readtranslate and self.readnl == u'\r\n'
                 assert self.decoded_chars_used == 0
-                line = remnant + self.decoded_chars
-                start = 0
-                offset_to_buffer = len(remnant)
-                remnant = None
+                if remnant == u'\r' and self.decoded_chars[0] == u'\n':
+                    builder.append(u'\r\n')
+                    self.decoded_chars_used = 1
+                    line = remnant = None
+                    start = endpos = 0
+                    break
+                else:
+                    builder.append(remnant)
+                    remnant = None
+                    continue
+
+            line = self.decoded_chars
+            start = self.decoded_chars_used
 
             line_len = len(line)
-            endpos, end_scan = self._find_line_ending(line, start, line_len)
-            chunked = builder.getlength()
+            if limit > 0:
+                remaining = limit - builder.getlength()
+                assert remaining >= 0
+            else:
+                remaining = sys.maxint
+            endpos, end_scan = self._find_line_ending(line, start, remaining)
+
             if endpos >= 0:
-                if limit >= 0 and endpos >= start + limit - chunked:
-                    endpos = start + limit - chunked
-                    assert endpos >= 0
                 break
+
             assert end_scan >= 0
-
             # We can put aside up to `end_scan`
-            if limit >= 0 and end_scan >= limit - chunked:
+            if limit >= 0 and end_scan - start >= remaining:
                 # Didn't find line ending, but reached length limit
-                endpos = start + limit - chunked
-                assert endpos >= 0
+                endpos = end_scan
                 break
 
             # No line ending seen yet - put aside current data
@@ -709,7 +712,7 @@
 
             # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if end_scan < line_len:
+            if end_scan < len(line):
                 remnant = line[end_scan:]
             line = None
             # We have consumed the buffer
@@ -717,9 +720,7 @@
 
         if line:
             # Our line ends in the current buffer
-            decoded_chars_used = endpos - offset_to_buffer
-            assert decoded_chars_used >= 0
-            self.decoded_chars_used = decoded_chars_used
+            self.decoded_chars_used = endpos
             if start > 0 or endpos < len(line):
                 line = line[start:endpos]
             builder.append(line)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -0,0 +1,33 @@
+from hypothesis import given, strategies as st, assume
+from pypy.module._io.interp_bytesio import W_BytesIO
+from pypy.module._io.interp_textio import W_TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(space, txt, mode, limit):
+    assume(limit != 0)
+    w_stream = W_BytesIO(space)
+    w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
+    w_textio = W_TextIOWrapper(space)
+    w_textio.descr_init(
+        space, w_stream, encoding='utf-8',
+        w_newline=space.newtext(mode))
+    lines = []
+    while True:
+        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        if limit > 0:
+            assert len(line) <= limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt

From pypy.commits at gmail.com  Fri Nov 24 15:20:42 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 12:20:42 -0800 (PST)
Subject: [pypy-commit] pypy default: Replace (pos-if-found,
 pos-if-not-found) tuple with (position, found)
Message-ID: <5a187f1a.4a981c0a.197b.5ae6@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93169:9c9233da7cc4
Date: 2017-11-24 20:18 +0000
http://bitbucket.org/pypy/pypy/changeset/9c9233da7cc4/

Log:	Replace (pos-if-found, pos-if-not-found) tuple with (position,
	found)

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -176,13 +176,13 @@
             limit = len(self.buf) - self.pos
         assert limit >= 0
 
-        endpos, consumed = self._find_line_ending(
+        endpos, found = self._find_line_ending(
             # XXX: super inefficient, makes a copy of the entire contents.
             u"".join(self.buf),
             start,
             limit
         )
-        if endpos < 0:
+        if not found:
             endpos = start + limit
         assert endpos >= 0
         self.pos = endpos
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -224,15 +224,15 @@
             ch = line[i]
             i += 1
             if ch == '\n':
-                return i, 0
+                return i, True
             if ch == '\r':
                 if i >= end:
                     break
                 if line[i] == '\n':
-                    return i + 1, 0
+                    return i + 1, True
                 else:
-                    return i, 0
-        return -1, end
+                    return i, True
+        return end, False
 
     def _find_marker(self, marker, line, start, limit):
         limit = min(limit, len(line) - start)
@@ -244,8 +244,8 @@
                     if line[i + j] != marker[j]:
                         break  # from inner loop
                 else:
-                    return i + len(marker), 0
-        return -1, end - len(marker) + 1
+                    return i + len(marker), True
+        return end - len(marker) + 1, False
 
     def _find_line_ending(self, line, start, limit):
         if self.readuniversal:
@@ -667,7 +667,7 @@
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = endpos = 0
+                start = end_scan = 0
                 break
 
             if remnant:
@@ -677,7 +677,7 @@
                     builder.append(u'\r\n')
                     self.decoded_chars_used = 1
                     line = remnant = None
-                    start = endpos = 0
+                    start = end_scan = 0
                     break
                 else:
                     builder.append(remnant)
@@ -686,23 +686,18 @@
 
             line = self.decoded_chars
             start = self.decoded_chars_used
-
-            line_len = len(line)
             if limit > 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
                 remaining = sys.maxint
-            endpos, end_scan = self._find_line_ending(line, start, remaining)
-
-            if endpos >= 0:
+            end_scan, found = self._find_line_ending(line, start, remaining)
+            assert end_scan >= 0
+            if found:
                 break
 
-            assert end_scan >= 0
-            # We can put aside up to `end_scan`
             if limit >= 0 and end_scan - start >= remaining:
                 # Didn't find line ending, but reached length limit
-                endpos = end_scan
                 break
 
             # No line ending seen yet - put aside current data
@@ -720,9 +715,9 @@
 
         if line:
             # Our line ends in the current buffer
-            self.decoded_chars_used = endpos
-            if start > 0 or endpos < len(line):
-                line = line[start:endpos]
+            self.decoded_chars_used = end_scan
+            if start > 0 or end_scan < len(line):
+                line = line[start:end_scan]
             builder.append(line)
         elif remnant:
             builder.append(remnant)

From pypy.commits at gmail.com  Fri Nov 24 15:26:03 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 12:26:03 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: hg merge default
Message-ID: <5a18805b.42e61c0a.73c2e.5f8f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93170:f9a1926628b2
Date: 2017-11-24 20:22 +0000
http://bitbucket.org/pypy/pypy/changeset/f9a1926628b2/

Log:	hg merge default

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,27 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(txt, mode, limit):
+    textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode)
+    lines = []
+    while True:
+        line = textio.readline(limit)
+        if limit > 0:
+            assert len(line) < limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+    if sys.platform.startswith('linux'):
+        from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+        configure_libbacktrace_linux()
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -174,18 +174,16 @@
         start = self.pos
         if limit < 0 or limit > len(self.buf) - self.pos:
             limit = len(self.buf) - self.pos
+        assert limit >= 0
 
-        assert limit >= 0
-        end = start + limit
-
-        endpos, consumed = self._find_line_ending(
+        endpos, found = self._find_line_ending(
             # XXX: super inefficient, makes a copy of the entire contents.
             u"".join(self.buf),
             start,
-            end
+            limit
         )
-        if endpos < 0:
-            endpos = end
+        if not found:
+            endpos = start + limit
         assert endpos >= 0
         self.pos = endpos
         return space.newunicode(u"".join(self.buf[start:endpos]))
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -221,44 +221,49 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_line_ending(self, line, start, end):
-        size = end - start
+    def _find_newline_universal(self, line, start, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        limit = min(limit, len(line) - start)
+        end = start + limit
+        i = start
+        while i < end:
+            ch = line[i]
+            i += 1
+            if ch == '\n':
+                return i, True
+            if ch == '\r':
+                if i >= end:
+                    break
+                if line[i] == '\n':
+                    return i + 1, True
+                else:
+                    return i, True
+        return end, False
+
+    def _find_marker(self, marker, line, start, limit):
+        limit = min(limit, len(line) - start)
+        end = start + limit
+        for i in range(start, end - len(marker) + 1):
+            ch = line[i]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if line[i + j] != marker[j]:
+                        break  # from inner loop
+                else:
+                    return i + len(marker), True
+        return end - len(marker) + 1, False
+
+    def _find_line_ending(self, line, start, limit):
         if self.readuniversal:
-            # Universal newline search. Find any of \r, \r\n, \n
-            # The decoder ensures that \r\n are not split in two pieces
-            i = start
-            while True:
-                # Fast path for non-control chars.
-                while i < end and line[i] > '\r':
-                    i += 1
-                if i >= end:
-                    return -1, size
-                ch = line[i]
-                i += 1
-                if ch == '\n':
-                    return i, 0
-                if ch == '\r':
-                    if line[i] == '\n':
-                        return i + 1, 0
-                    else:
-                        return i, 0
+            return self._find_newline_universal(line, start, limit)
         if self.readtranslate:
             # Newlines are already translated, only search for \n
             newline = '\n'
         else:
             # Non-universal mode.
             newline = self.readnl
-        end_scan = end - len(newline) + 1
-        for i in range(start, end_scan):
-            ch = line[i]
-            if ch == newline[0]:
-                for j in range(1, len(newline)):
-                    if line[i + j] != newline[j]:
-                        break
-                else:
-                    return i + len(newline), 0
-        return -1, end_scan
-
+        return self._find_marker(newline, line, start, limit)
 
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
@@ -661,7 +666,7 @@
         limit = convert_size(space, w_limit)
 
         line = None
-        remaining = None
+        remnant = None
         builder = StringBuilder()
 
         while True:
@@ -669,61 +674,60 @@
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = endpos = offset_to_buffer = 0
+                start = end_scan = 0
                 break
 
-            if not remaining:
-                line = self.decoded_chars
-                start = self.decoded_chars_used
-                offset_to_buffer = 0
+            if remnant:
+                assert not self.readtranslate and self.readnl == '\r\n'
+                assert self.decoded_chars_used == 0
+                if remnant == '\r' and self.decoded_chars[0] == '\n':
+                    builder.append('\r\n')
+                    self.decoded_chars_used = 1
+                    line = remnant = None
+                    start = end_scan = 0
+                    break
+                else:
+                    builder.append(remnant)
+                    remnant = None
+                    continue
+
+            line = self.decoded_chars
+            start = self.decoded_chars_used
+            if limit > 0:
+                remaining = limit - builder.getlength()
+                assert remaining >= 0
             else:
-                assert self.decoded_chars_used == 0
-                line = remaining + self.decoded_chars
-                start = 0
-                offset_to_buffer = len(remaining)
-                remaining = None
+                remaining = sys.maxint
+            end_scan, found = self._find_line_ending(line, start, remaining)
+            assert end_scan >= 0
+            if found:
+                break
 
-            line_len = len(line)
-            endpos, consumed = self._find_line_ending(line, start, line_len)
-            chunked = builder.getlength()
-            if endpos >= 0:
-                if limit >= 0 and endpos >= start + limit - chunked:
-                    endpos = start + limit - chunked
-                    assert endpos >= 0
-                break
-            assert consumed >= 0
-
-            # We can put aside up to `endpos`
-            endpos = consumed + start
-            if limit >= 0 and endpos >= start + limit - chunked:
+            if limit >= 0 and end_scan - start >= remaining:
                 # Didn't find line ending, but reached length limit
-                endpos = start + limit - chunked
-                assert endpos >= 0
                 break
 
             # No line ending seen yet - put aside current data
-            if endpos > start:
-                s = line[start:endpos]
+            if end_scan > start:
+                s = line[start:end_scan]
                 builder.append(s)
 
-            # There may be some remaining bytes we'll have to prepend to the
+            # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if endpos < line_len:
-                remaining = line[endpos:]
+            if end_scan < len(line):
+                remnant = line[end_scan:]
             line = None
             # We have consumed the buffer
             self._unset_decoded()
 
         if line:
             # Our line ends in the current buffer
-            decoded_chars_used = endpos - offset_to_buffer
-            assert decoded_chars_used >= 0
-            self.decoded_chars_used = decoded_chars_used
-            if start > 0 or endpos < len(line):
-                line = line[start:endpos]
+            self.decoded_chars_used = end_scan
+            if start > 0 or end_scan < len(line):
+                line = line[start:end_scan]
             builder.append(line)
-        elif remaining:
-            builder.append(remaining)
+        elif remnant:
+            builder.append(remnant)
 
         result = builder.build()
         return space.new_from_utf8(result)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -0,0 +1,33 @@
+from hypothesis import given, strategies as st, assume
+from pypy.module._io.interp_bytesio import W_BytesIO
+from pypy.module._io.interp_textio import W_TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(space, txt, mode, limit):
+    assume(limit != 0)
+    w_stream = W_BytesIO(space)
+    w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
+    w_textio = W_TextIOWrapper(space)
+    w_textio.descr_init(
+        space, w_stream, encoding='utf-8',
+        w_newline=space.newtext(mode))
+    lines = []
+    while True:
+        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        if limit > 0:
+            assert len(line) <= limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
 cffi>=1.4.0
-vmprof>=0.4.10  # required to parse log files in rvmprof tests
+
+# parse log files in rvmprof tests
+vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -9,6 +9,7 @@
 from rpython.rtyper.tool import rffi_platform as platform
 from rpython.rlib import rthread, jit
 from rpython.rlib.objectmodel import we_are_translated
+from rpython.config.translationoption import get_translation_config
 
 class VMProfPlatformUnsupported(Exception):
     pass
@@ -133,11 +134,17 @@
 #endif
 """])
 
+if get_translation_config() is None:
+    # tests need the full eci here
+    _eci = global_eci
+else:
+    _eci = auto_eci
+
 vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
-                                       rffi.INT, compilation_info=auto_eci,
+                                       rffi.INT, compilation_info=_eci,
                                        _nowrapper=True)
 vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
-                                        lltype.Void, compilation_info=auto_eci,
+                                        lltype.Void, compilation_info=_eci,
                                         _nowrapper=True)
 
 
From pypy.commits at gmail.com  Fri Nov 24 19:57:47 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 16:57:47 -0800 (PST)
Subject: [pypy-commit] pypy default: Specify the encoding,
 for systems where utf-8 isn't the default
Message-ID: <5a18c00b.cc87df0a.b36f6.6725@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93171:9b3b4676e3b7
Date: 2017-11-25 00:55 +0000
http://bitbucket.org/pypy/pypy/changeset/9b3b4676e3b7/

Log:	Specify the encoding, for systems where utf-8 isn't the default

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -14,7 +14,8 @@
        mode=st.sampled_from(['\r', '\n', '\r\n', '']),
        limit=st.integers(min_value=-1))
 def test_readline(txt, mode, limit):
-    textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode)
+    textio = TextIOWrapper(
+        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
     lines = []
     while True:
         line = textio.readline(limit)

From pypy.commits at gmail.com  Fri Nov 24 21:31:05 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 18:31:05 -0800 (PST)
Subject: [pypy-commit] pypy default: Extract UnicodeIO object from W_StringIO
Message-ID: <5a18d5e9.c9b81c0a.abdfc.5dc5@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93173:1d90f3200c9c
Date: 2017-11-25 02:29 +0000
http://bitbucket.org/pypy/pypy/changeset/1d90f3200c9c/

Log:	Extract UnicodeIO object from W_StringIO

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -2,21 +2,65 @@
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
+from pypy.module._io.interp_textio import (
+        W_TextIOBase, W_IncrementalNewlineDecoder)
 from pypy.module._io.interp_iobase import convert_size
 
+class UnicodeIO(object):
+    def __init__(self, data=None, pos=0):
+        if data is None:
+            data = []
+        self.data = data
+        self.pos = pos
+
+    def resize(self, newlength):
+        if len(self.data) > newlength:
+            self.data = self.data[:newlength]
+        if len(self.data) < newlength:
+            self.data.extend([u'\0'] * (newlength - len(self.data)))
+
+    def read(self, size):
+        start = self.pos
+        available = len(self.data) - start
+        if available <= 0:
+            return u''
+        if size >= 0 and size <= available:
+            end = start + size
+        else:
+            end = len(self.data)
+        assert 0 <= start <= end
+        self.pos = end
+        return u''.join(self.data[start:end])
+
+    def write(self, string):
+        length = len(string)
+        if self.pos + length > len(self.data):
+            self.resize(self.pos + length)
+
+        for i in range(length):
+            self.data[self.pos + i] = string[i]
+        self.pos += length
+
+    def seek(self, pos):
+        self.pos = pos
+
+    def truncate(self, size):
+        if size < len(self.data):
+            self.resize(size)
+
+    def getvalue(self):
+        return u''.join(self.data)
+
 
 class W_StringIO(W_TextIOBase):
     def __init__(self, space):
         W_TextIOBase.__init__(self, space)
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
 
-    @unwrap_spec(w_newline = WrappedDefault("\n"))
+    @unwrap_spec(w_newline=WrappedDefault("\n"))
     def descr_init(self, space, w_initvalue=None, w_newline=None):
         # In case __init__ is called multiple times
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
         self.w_decoder = None
         self.readnl = None
         self.writenl = None
@@ -27,7 +71,7 @@
             newline = space.unicode_w(w_newline)
 
         if (newline is not None and newline != u"" and newline != u"\n" and
-            newline != u"\r" and newline != u"\r\n"):
+                newline != u"\r" and newline != u"\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -50,7 +94,7 @@
 
         if not space.is_none(w_initvalue):
             self.write_w(space, w_initvalue)
-            self.pos = 0
+            self.buf.pos = 0
 
     def descr_getstate(self, space):
         w_initialval = self.getvalue_w(space)
@@ -58,9 +102,9 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl)) # YYY
+            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
         return space.newtuple([
-            w_initialval, w_readnl, space.newint(self.pos), w_dict
+            w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
 
     def descr_setstate(self, space, w_state):
@@ -69,34 +113,33 @@
         # We allow the state tuple to be longer than 4, because we may need
         # someday to extend the object's state without breaking
         # backwards-compatibility
-        if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4:
+        if (not space.isinstance_w(w_state, space.w_tuple)
+                or space.len_w(w_state) < 4):
             raise oefmt(space.w_TypeError,
                         "%T.__setstate__ argument should be a 4-tuple, got %T",
                         self, w_state)
         w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4)
+        if not space.isinstance_w(w_initval, space.w_unicode):
+            raise oefmt(space.w_TypeError,
+                        "unicode argument expected, got '%T'", w_initval)
         # Initialize state
-        self.descr_init(space, w_initval, w_readnl)
+        self.descr_init(space, None, w_readnl)
 
-        # Restore the buffer state. Even if __init__ did initialize the buffer,
-        # we have to initialize it again since __init__ may translates the
-        # newlines in the inital_value string. We clearly do not want that
+        # Restore the buffer state. We're not doing it via __init__
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
         initval = space.unicode_w(w_initval)
-        size = len(initval)
-        self.resize_buffer(size)
-        self.buf = list(initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
                         "position value cannot be negative")
-        self.pos = pos
+        self.buf = UnicodeIO(list(initval), pos)
         if not space.is_w(w_dict, space.w_None):
             if not space.isinstance_w(w_dict, space.w_dict):
-                raise oefmt(space.w_TypeError,
-                            "fourth item of state should be a dict, got a %T",
-                            w_dict)
+                raise oefmt(
+                    space.w_TypeError,
+                    "fourth item of state should be a dict, got a %T", w_dict)
             # Alternatively, we could replace the internal dictionary
             # completely. However, it seems more practical to just update it.
             space.call_method(self.w_dict, "update", w_dict)
@@ -107,86 +150,56 @@
                 message = "I/O operation on closed file"
             raise OperationError(space.w_ValueError, space.newtext(message))
 
-    def resize_buffer(self, newlength):
-        if len(self.buf) > newlength:
-            self.buf = self.buf[:newlength]
-        if len(self.buf) < newlength:
-            self.buf.extend([u'\0'] * (newlength - len(self.buf)))
-
-    def write(self, string):
-        length = len(string)
-        if self.pos + length > len(self.buf):
-            self.resize_buffer(self.pos + length)
-
-        for i in range(length):
-            self.buf[self.pos + i] = string[i]
-        self.pos += length
-
     def write_w(self, space, w_obj):
         if not space.isinstance_w(w_obj, space.w_unicode):
             raise oefmt(space.w_TypeError,
                         "unicode argument expected, got '%T'", w_obj)
         self._check_closed(space)
-
         orig_size = space.len_w(w_obj)
 
         if self.w_decoder is not None:
             w_decoded = space.call_method(
-                self.w_decoder, "decode", w_obj, space.w_True
-            )
+                self.w_decoder, "decode", w_obj, space.w_True)
         else:
             w_decoded = w_obj
-
         if self.writenl:
             w_decoded = space.call_method(
-                w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl)
-            )
+                w_decoded, "replace",
+                space.newtext("\n"), space.newunicode(self.writenl))
+        string = space.unicode_w(w_decoded)
+        if string:
+            self.buf.write(string)
 
-        string = space.unicode_w(w_decoded)
-        size = len(string)
-
-        if size:
-            self.write(string)
         return space.newint(orig_size)
 
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        start = self.pos
-        available = len(self.buf) - start
-        if available <= 0:
-            return space.newunicode(u"")
-        if size >= 0 and size <= available:
-            end = start + size
-        else:
-            end = len(self.buf)
-        assert 0 <= start <= end
-        self.pos = end
-        return space.newunicode(u''.join(self.buf[start:end]))
+        return space.newunicode(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
 
-        if self.pos >= len(self.buf):
+        if self.buf.pos >= len(self.buf.data):
             return space.newunicode(u"")
 
-        start = self.pos
-        if limit < 0 or limit > len(self.buf) - self.pos:
-            limit = len(self.buf) - self.pos
+        start = self.buf.pos
+        if limit < 0 or limit > len(self.buf.data) - self.buf.pos:
+            limit = len(self.buf.data) - self.buf.pos
         assert limit >= 0
 
         endpos, found = self._find_line_ending(
             # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf),
+            u"".join(self.buf.data),
             start,
             limit
         )
         if not found:
             endpos = start + limit
         assert endpos >= 0
-        self.pos = endpos
-        return space.newunicode(u"".join(self.buf[start:endpos]))
+        self.buf.pos = endpos
+        return space.newunicode(u"".join(self.buf.data[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):
@@ -202,32 +215,27 @@
 
         # XXX: this makes almost no sense, but its how CPython does it.
         if mode == 1:
-            pos = self.pos
+            pos = self.buf.pos
         elif mode == 2:
-            pos = len(self.buf)
-
+            pos = len(self.buf.data)
         assert pos >= 0
-        self.pos = pos
+        self.buf.seek(pos)
         return space.newint(pos)
 
     def truncate_w(self, space, w_size=None):
         self._check_closed(space)
         if space.is_none(w_size):
-            size = self.pos
+            size = self.buf.pos
         else:
             size = space.int_w(w_size)
-
         if size < 0:
             raise oefmt(space.w_ValueError, "Negative size value %d", size)
-
-        if size < len(self.buf):
-            self.resize_buffer(size)
-
+        self.buf.truncate(size)
         return space.newint(size)
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(u''.join(self.buf))
+        return space.newunicode(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)

From pypy.commits at gmail.com  Fri Nov 24 21:31:03 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 18:31:03 -0800 (PST)
Subject: [pypy-commit] pypy default: Extract DecodeBuffer object from
 W_TextIOWrapper
Message-ID: <5a18d5e7.d31b1c0a.6e981.26d4@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93172:e1dbf4f46c45
Date: 2017-11-25 01:15 +0000
http://bitbucket.org/pypy/pypy/changeset/e1dbf4f46c45/

Log:	Extract DecodeBuffer object from W_TextIOWrapper

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -333,6 +333,45 @@
         self.input = input
 
 
+class DecodeBuffer(object):
+    def __init__(self):
+        self.text = None
+        self.pos = 0
+
+    def set(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.text = space.unicode_w(w_decoded)
+        self.pos = 0
+
+    def reset(self):
+        self.text = None
+        self.pos = 0
+
+    def get_chars(self, size):
+        if self.text is None:
+            return u""
+
+        available = len(self.text) - self.pos
+        if size < 0 or size > available:
+            size = available
+        assert size >= 0
+
+        if self.pos > 0 or size < available:
+            start = self.pos
+            end = self.pos + size
+            assert start >= 0
+            assert end >= 0
+            chars = self.text[start:end]
+        else:
+            chars = self.text
+
+        self.pos += size
+        return chars
+
+    def has_data(self):
+        return (self.text is not None and self.pos < len(self.text))
+
+
 def check_decoded(space, w_decoded):
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
@@ -346,8 +385,7 @@
         self.w_encoder = None
         self.w_decoder = None
 
-        self.decoded_chars = None   # buffer for text returned from decoder
-        self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+        self.decoded = DecodeBuffer()
         self.pending_bytes = None   # list of bytes objects waiting to be
                                     # written, or NULL
         self.chunk_size = 8192
@@ -515,44 +553,10 @@
     # _____________________________________________________________
     # read methods
 
-    def _unset_decoded(self):
-        self.decoded_chars = None
-        self.decoded_chars_used = 0
-
-    def _set_decoded(self, space, w_decoded):
-        check_decoded(space, w_decoded)
-        self.decoded_chars = space.unicode_w(w_decoded)
-        self.decoded_chars_used = 0
-
-    def _get_decoded_chars(self, size):
-        if self.decoded_chars is None:
-            return u""
-
-        available = len(self.decoded_chars) - self.decoded_chars_used
-        if size < 0 or size > available:
-            size = available
-        assert size >= 0
-
-        if self.decoded_chars_used > 0 or size < available:
-            start = self.decoded_chars_used
-            end = self.decoded_chars_used + size
-            assert start >= 0
-            assert end >= 0
-            chars = self.decoded_chars[start:end]
-        else:
-            chars = self.decoded_chars
-
-        self.decoded_chars_used += size
-        return chars
-
-    def _has_data(self):
-        return (self.decoded_chars is not None and
-            self.decoded_chars_used < len(self.decoded_chars))
-
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
-        is placed in self._decoded_chars (replacing its previous value).
+        is placed in self.decoded (replacing its previous value).
         The entire input chunk is sent to the decoder, though some of it may
         remain buffered in the decoder, yet to be converted."""
 
@@ -572,7 +576,7 @@
             dec_buffer = None
             dec_flags = 0
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars
+        # Read a chunk, decode it, and put the result in self.decoded
         w_input = space.call_method(self.w_buffer, "read1",
                                     space.newint(self.chunk_size))
 
@@ -584,7 +588,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        self._set_decoded(space, w_decoded)
+        self.decoded.set(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -597,10 +601,10 @@
         return not eof
 
     def _ensure_data(self, space):
-        while not self._has_data():
+        while not self.decoded.has_data():
             try:
                 if not self._read_chunk(space):
-                    self._unset_decoded()
+                    self.decoded.reset()
                     self.snapshot = None
                     return False
             except OperationError as e:
@@ -633,7 +637,7 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.newunicode(self._get_decoded_chars(-1))
+            w_result = space.newunicode(self.decoded.get_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
@@ -645,7 +649,7 @@
         while remaining > 0:
             if not self._ensure_data(space):
                 break
-            data = self._get_decoded_chars(remaining)
+            data = self.decoded.get_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
@@ -672,10 +676,10 @@
 
             if remnant:
                 assert not self.readtranslate and self.readnl == u'\r\n'
-                assert self.decoded_chars_used == 0
-                if remnant == u'\r' and self.decoded_chars[0] == u'\n':
+                assert self.decoded.pos == 0
+                if remnant == u'\r' and self.decoded.text[0] == u'\n':
                     builder.append(u'\r\n')
-                    self.decoded_chars_used = 1
+                    self.decoded.pos = 1
                     line = remnant = None
                     start = end_scan = 0
                     break
@@ -684,8 +688,8 @@
                     remnant = None
                     continue
 
-            line = self.decoded_chars
-            start = self.decoded_chars_used
+            line = self.decoded.text
+            start = self.decoded.pos
             if limit > 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
@@ -711,11 +715,11 @@
                 remnant = line[end_scan:]
             line = None
             # We have consumed the buffer
-            self._unset_decoded()
+            self.decoded.reset()
 
         if line:
             # Our line ends in the current buffer
-            self.decoded_chars_used = end_scan
+            self.decoded.pos = end_scan
             if start > 0 or end_scan < len(line):
                 line = line[start:end_scan]
             builder.append(line)
@@ -855,7 +859,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._unset_decoded()
+            self.decoded.reset()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -880,7 +884,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._unset_decoded()
+        self.decoded.reset()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -901,13 +905,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self._set_decoded(space, w_decoded)
+            self.decoded.set(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded_chars) < cookie.chars_to_skip:
+            if len(self.decoded.text) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded_chars_used = cookie.chars_to_skip
+            self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -933,7 +937,7 @@
         w_pos = space.call_method(self.w_buffer, "tell")
 
         if self.w_decoder is None or self.snapshot is None:
-            assert not self.decoded_chars
+            assert not self.decoded.text
             return w_pos
 
         cookie = PositionCookie(space.bigint_w(w_pos))
@@ -944,11 +948,11 @@
         cookie.start_pos -= len(input)
 
         # How many decoded characters have been used up since the snapshot?
-        if not self.decoded_chars_used:
+        if not self.decoded.pos:
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded_chars_used
+        chars_to_skip = self.decoded.pos
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.

From pypy.commits at gmail.com  Fri Nov 24 22:49:00 2017
From: pypy.commits at gmail.com (rlamy)
Date: Fri, 24 Nov 2017 19:49:00 -0800 (PST)
Subject: [pypy-commit] pypy default: Add readline() and readline_universal()
 methods to UnicodeIO, and stop sharing the implementation with textio
Message-ID: <5a18e82c.8faedf0a.ec3e7.9c84@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93174:82244130bf34
Date: 2017-11-25 03:46 +0000
http://bitbucket.org/pypy/pypy/changeset/82244130bf34/

Log:	Add readline() and readline_universal() methods to UnicodeIO, and
	stop sharing the implementation with textio

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -32,6 +32,56 @@
         self.pos = end
         return u''.join(self.data[start:end])
 
+    def _convert_limit(self, limit):
+        if limit < 0 or limit > len(self.data) - self.pos:
+            limit = len(self.data) - self.pos
+        assert limit >= 0
+        return limit
+
+    def readline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        limit = self._convert_limit(limit)
+        start = self.pos
+        end = start + limit
+        pos = start
+        while pos < end:
+            ch = self.data[pos]
+            pos += 1
+            if ch == '\n':
+                break
+            if ch == '\r':
+                if pos >= end:
+                    break
+                if self.data[pos] == '\n':
+                    pos += 1
+                    break
+                else:
+                    break
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def readline(self, marker, limit):
+        start = self.pos
+        limit = self._convert_limit(limit)
+        end = start + limit
+        found = False
+        for pos in range(start, end - len(marker) + 1):
+            ch = self.data[pos]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if self.data[pos + j] != marker[j]:
+                        break  # from inner loop
+                else:
+                    pos += len(marker)
+                    found = True
+                    break
+        if not found:
+            pos = end
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
     def write(self, string):
         length = len(string)
         if self.pos + length > len(self.data):
@@ -180,26 +230,17 @@
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
+        if self.readuniversal:
+            result = self.buf.readline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                newline = self.readnl
+            result = self.buf.readline(newline, limit)
+        return space.newunicode(result)
 
-        if self.buf.pos >= len(self.buf.data):
-            return space.newunicode(u"")
-
-        start = self.buf.pos
-        if limit < 0 or limit > len(self.buf.data) - self.buf.pos:
-            limit = len(self.buf.data) - self.buf.pos
-        assert limit >= 0
-
-        endpos, found = self._find_line_ending(
-            # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf.data),
-            start,
-            limit
-        )
-        if not found:
-            endpos = start + limit
-        assert endpos >= 0
-        self.buf.pos = endpos
-        return space.newunicode(u"".join(self.buf.data[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):

From pypy.commits at gmail.com  Sat Nov 25 12:55:48 2017
From: pypy.commits at gmail.com (mattip)
Date: Sat, 25 Nov 2017 09:55:48 -0800 (PST)
Subject: [pypy-commit] buildbot default: cleanup,
 add ensurepip step for TranslatedTests (downloading only) builders
 (ARM)
Message-ID: <5a19aea4.32acdf0a.683e1.26ec@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r1044:a64690c374cf
Date: 2017-11-25 19:55 +0200
http://bitbucket.org/pypy/buildbot/changeset/a64690c374cf/

Log:	cleanup, add ensurepip step for TranslatedTests (downloading only)
	builders (ARM)

	builders that translate call ensurepip as part of translation and in
	packaging

diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py
--- a/bot2/pypybuildbot/builds.py
+++ b/bot2/pypybuildbot/builds.py
@@ -475,6 +475,10 @@
             clean = 'rm -rf pypy-venv'
         target = Property('target_path')
         factory.addStep(ShellCmd(
+            description="ensurepip",
+            command=prefix + [target, '-mensurepip'],
+            flunkOnFailure=True))
+        factory.addStep(ShellCmd(
             description="clean old virtualenv",
             command=clean,
             workdir='venv',
@@ -750,12 +754,6 @@
             haltOnFailure=True,
             workdir='.'))
         self.addStep(ShellCmd(
-            description="copy ctypes resource cache",
-            # eventually remove this step, not needed after 5.1
-            command=['cp', '-rv', 'pypy-c/lib_pypy/ctypes_config_cache', 'build/lib_pypy'],
-            haltOnFailure=False,
-            workdir='.'))
-        self.addStep(ShellCmd(
             description="copy cffi import libraries",
             command='cp -rv pypy-c/lib_pypy/*.so build/lib_pypy',
             haltOnFailure=True,

From pypy.commits at gmail.com  Sat Nov 25 20:24:27 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 17:24:27 -0800 (PST)
Subject: [pypy-commit] pypy default: Add some tests for DecodeBuffer
Message-ID: <5a1a17cb.c4c21c0a.f510f.f2b6@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93176:e8e611955c9a
Date: 2017-11-26 01:22 +0000
http://bitbucket.org/pypy/pypy/changeset/e8e611955c9a/

Log:	Add some tests for DecodeBuffer

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -290,8 +290,8 @@
 
 
 class DecodeBuffer(object):
-    def __init__(self):
-        self.text = None
+    def __init__(self, text=None):
+        self.text = text
         self.pos = 0
 
     def set(self, space, w_decoded):
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,6 +1,10 @@
-from hypothesis import given, strategies as st, assume
+import pytest
+try:
+    from hypothesis import given, strategies as st, assume
+except ImportError:
+    pytest.skip("hypothesis required")
 from pypy.module._io.interp_bytesio import W_BytesIO
-from pypy.module._io.interp_textio import W_TextIOWrapper
+from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
 
 LINESEP = ['', '\r', '\n', '\r\n']
 
@@ -31,3 +35,34 @@
         else:
             break
     assert u''.join(lines) == txt
+
+ at given(st.text())
+def test_read_buffer(text):
+    buf = DecodeBuffer(text)
+    assert buf.get_chars(-1) == text
+    assert buf.exhausted()
+
+ at given(st.text(), st.lists(st.integers(min_value=0)))
+def test_readn_buffer(text, sizes):
+    buf = DecodeBuffer(text)
+    strings = []
+    for n in sizes:
+        s = buf.get_chars(n)
+        if not buf.exhausted():
+            assert len(s) == n
+        else:
+            assert len(s) <= n
+        strings.append(s)
+    assert ''.join(strings) == text[:sum(sizes)]
+
+ at given(st.text())
+def test_next_char(text):
+    buf = DecodeBuffer(text)
+    chars = []
+    try:
+        while True:
+            chars.append(buf.next_char())
+    except StopIteration:
+        pass
+    assert buf.exhausted()
+    assert u''.join(chars) == text

From pypy.commits at gmail.com  Sat Nov 25 20:24:24 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 17:24:24 -0800 (PST)
Subject: [pypy-commit] pypy default: Refactor readline_w() and move most of
 the logic to DecodeBuffer
Message-ID: <5a1a17c8.cb3a1c0a.79405.4011@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93175:65f3ab0d10e3
Date: 2017-11-26 00:54 +0000
http://bitbucket.org/pypy/pypy/changeset/65f3ab0d10e3/

Log:	Refactor readline_w() and move most of the logic to DecodeBuffer

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -214,50 +214,6 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_newline_universal(self, line, start, limit):
-        # Universal newline search. Find any of \r, \r\n, \n
-        # The decoder ensures that \r\n are not split in two pieces
-        limit = min(limit, len(line) - start)
-        end = start + limit
-        i = start
-        while i < end:
-            ch = line[i]
-            i += 1
-            if ch == '\n':
-                return i, True
-            if ch == '\r':
-                if i >= end:
-                    break
-                if line[i] == '\n':
-                    return i + 1, True
-                else:
-                    return i, True
-        return end, False
-
-    def _find_marker(self, marker, line, start, limit):
-        limit = min(limit, len(line) - start)
-        end = start + limit
-        for i in range(start, end - len(marker) + 1):
-            ch = line[i]
-            if ch == marker[0]:
-                for j in range(1, len(marker)):
-                    if line[i + j] != marker[j]:
-                        break  # from inner loop
-                else:
-                    return i + len(marker), True
-        return end - len(marker) + 1, False
-
-    def _find_line_ending(self, line, start, limit):
-        if self.readuniversal:
-            return self._find_newline_universal(line, start, limit)
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            newline = u'\n'
-        else:
-            # Non-universal mode.
-            newline = self.readnl
-        return self._find_marker(newline, line, start, limit)
-
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
     __new__ = generic_new_descr(W_TextIOBase),
@@ -369,7 +325,88 @@
         return chars
 
     def has_data(self):
-        return (self.text is not None and self.pos < len(self.text))
+        return (self.text is not None and not self.exhausted())
+
+    def exhausted(self):
+        return self.pos >= len(self.text)
+
+    def next_char(self):
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        self.pos += 1
+        return ch
+
+    def peek_char(self):
+        # like next_char, but doesn't advance pos
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        return ch
+
+    def find_newline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == u'\n':
+                return True
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    ch = self.peek_char()
+                except StopIteration:
+                    return False
+                if ch == u'\n':
+                    self.next_char()
+                    return True
+                else:
+                    return True
+        return False
+
+    def find_crlf(self, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            scanned += 1
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    if self.peek_char() == u'\n':
+                        self.next_char()
+                        return True
+                except StopIteration:
+                    # This is the tricky case: we found a \r right at the end
+                    self.pos -= 1
+                    return False
+        return False
+
+    def find_char(self, marker, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == marker:
+                return True
+            scanned += 1
+        return False
 
 
 def check_decoded(space, w_decoded):
@@ -655,23 +692,36 @@
 
         return space.newunicode(builder.build())
 
+    def _scan_line_ending(self, limit):
+        if self.readuniversal:
+            return self.decoded.find_newline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                # Non-universal mode.
+                newline = self.readnl
+            if newline == u'\r\n':
+                return self.decoded.find_crlf(limit)
+            else:
+                return self.decoded.find_char(newline[0], limit)
+
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
         self._check_closed(space)
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-
-        line = None
         remnant = None
         builder = UnicodeBuilder()
-
         while True:
             # First, get some data if necessary
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = end_scan = 0
+                if remnant:
+                    builder.append(remnant)
                 break
 
             if remnant:
@@ -680,52 +730,36 @@
                 if remnant == u'\r' and self.decoded.text[0] == u'\n':
                     builder.append(u'\r\n')
                     self.decoded.pos = 1
-                    line = remnant = None
-                    start = end_scan = 0
+                    remnant = None
                     break
                 else:
                     builder.append(remnant)
                     remnant = None
                     continue
 
-            line = self.decoded.text
-            start = self.decoded.pos
             if limit > 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
-                remaining = sys.maxint
-            end_scan, found = self._find_line_ending(line, start, remaining)
-            assert end_scan >= 0
-            if found:
+                remaining = -1
+            start = self.decoded.pos
+            assert start >= 0
+            found = self._scan_line_ending(remaining)
+            end_scan = self.decoded.pos
+            if end_scan > start:
+                s = self.decoded.text[start:end_scan]
+                builder.append(s)
+
+            if found or (limit >= 0 and builder.getlength() >= limit):
                 break
 
-            if limit >= 0 and end_scan - start >= remaining:
-                # Didn't find line ending, but reached length limit
-                break
-
-            # No line ending seen yet - put aside current data
-            if end_scan > start:
-                s = line[start:end_scan]
-                builder.append(s)
-
             # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if end_scan < len(line):
-                remnant = line[end_scan:]
-            line = None
+            if not self.decoded.exhausted():
+                remnant = self.decoded.get_chars(-1)
             # We have consumed the buffer
             self.decoded.reset()
 
-        if line:
-            # Our line ends in the current buffer
-            self.decoded.pos = end_scan
-            if start > 0 or end_scan < len(line):
-                line = line[start:end_scan]
-            builder.append(line)
-        elif remnant:
-            builder.append(remnant)
-
         result = builder.build()
         return space.newunicode(result)
 

From pypy.commits at gmail.com  Sat Nov 25 20:29:38 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 17:29:38 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: hg merge default
Message-ID: <5a1a1902.5a86df0a.3270c.5cd2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93177:a40f7eee2bcf
Date: 2017-11-26 01:27 +0000
http://bitbucket.org/pypy/pypy/changeset/a40f7eee2bcf/

Log:	hg merge default

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -14,7 +14,8 @@
        mode=st.sampled_from(['\r', '\n', '\r\n', '']),
        limit=st.integers(min_value=-1))
 def test_readline(txt, mode, limit):
-    textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode)
+    textio = TextIOWrapper(
+        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
     lines = []
     while True:
         line = textio.readline(limit)
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -2,21 +2,115 @@
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
+from pypy.module._io.interp_textio import (
+        W_TextIOBase, W_IncrementalNewlineDecoder)
 from pypy.module._io.interp_iobase import convert_size
 
+class UnicodeIO(object):
+    def __init__(self, data=None, pos=0):
+        if data is None:
+            data = []
+        self.data = data
+        self.pos = pos
+
+    def resize(self, newlength):
+        if len(self.data) > newlength:
+            self.data = self.data[:newlength]
+        if len(self.data) < newlength:
+            self.data.extend([u'\0'] * (newlength - len(self.data)))
+
+    def read(self, size):
+        start = self.pos
+        available = len(self.data) - start
+        if available <= 0:
+            return u''
+        if size >= 0 and size <= available:
+            end = start + size
+        else:
+            end = len(self.data)
+        assert 0 <= start <= end
+        self.pos = end
+        return u''.join(self.data[start:end])
+
+    def _convert_limit(self, limit):
+        if limit < 0 or limit > len(self.data) - self.pos:
+            limit = len(self.data) - self.pos
+        assert limit >= 0
+        return limit
+
+    def readline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        limit = self._convert_limit(limit)
+        start = self.pos
+        end = start + limit
+        pos = start
+        while pos < end:
+            ch = self.data[pos]
+            pos += 1
+            if ch == '\n':
+                break
+            if ch == '\r':
+                if pos >= end:
+                    break
+                if self.data[pos] == '\n':
+                    pos += 1
+                    break
+                else:
+                    break
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def readline(self, marker, limit):
+        start = self.pos
+        limit = self._convert_limit(limit)
+        end = start + limit
+        found = False
+        for pos in range(start, end - len(marker) + 1):
+            ch = self.data[pos]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if self.data[pos + j] != marker[j]:
+                        break  # from inner loop
+                else:
+                    pos += len(marker)
+                    found = True
+                    break
+        if not found:
+            pos = end
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def write(self, string):
+        length = len(string)
+        if self.pos + length > len(self.data):
+            self.resize(self.pos + length)
+
+        for i in range(length):
+            self.data[self.pos + i] = string[i]
+        self.pos += length
+
+    def seek(self, pos):
+        self.pos = pos
+
+    def truncate(self, size):
+        if size < len(self.data):
+            self.resize(size)
+
+    def getvalue(self):
+        return u''.join(self.data)
+
 
 class W_StringIO(W_TextIOBase):
     def __init__(self, space):
         W_TextIOBase.__init__(self, space)
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
 
-    @unwrap_spec(w_newline = WrappedDefault("\n"))
+    @unwrap_spec(w_newline=WrappedDefault("\n"))
     def descr_init(self, space, w_initvalue=None, w_newline=None):
         # In case __init__ is called multiple times
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
         self.w_decoder = None
         self.readnl = None
         self.writenl = None
@@ -27,7 +121,7 @@
             newline = space.unicode_w(w_newline)
 
         if (newline is not None and newline != u"" and newline != u"\n" and
-            newline != u"\r" and newline != u"\r\n"):
+                newline != u"\r" and newline != u"\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -50,7 +144,7 @@
 
         if not space.is_none(w_initvalue):
             self.write_w(space, w_initvalue)
-            self.pos = 0
+            self.buf.pos = 0
 
     def descr_getstate(self, space):
         w_initialval = self.getvalue_w(space)
@@ -58,9 +152,9 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl)) # YYY
+            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
         return space.newtuple([
-            w_initialval, w_readnl, space.newint(self.pos), w_dict
+            w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
 
     def descr_setstate(self, space, w_state):
@@ -69,34 +163,33 @@
         # We allow the state tuple to be longer than 4, because we may need
         # someday to extend the object's state without breaking
         # backwards-compatibility
-        if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4:
+        if (not space.isinstance_w(w_state, space.w_tuple)
+                or space.len_w(w_state) < 4):
             raise oefmt(space.w_TypeError,
                         "%T.__setstate__ argument should be a 4-tuple, got %T",
                         self, w_state)
         w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4)
+        if not space.isinstance_w(w_initval, space.w_unicode):
+            raise oefmt(space.w_TypeError,
+                        "unicode argument expected, got '%T'", w_initval)
         # Initialize state
-        self.descr_init(space, w_initval, w_readnl)
+        self.descr_init(space, None, w_readnl)
 
-        # Restore the buffer state. Even if __init__ did initialize the buffer,
-        # we have to initialize it again since __init__ may translates the
-        # newlines in the inital_value string. We clearly do not want that
+        # Restore the buffer state. We're not doing it via __init__
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
         initval = space.unicode_w(w_initval)
-        size = len(initval)
-        self.resize_buffer(size)
-        self.buf = list(initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
                         "position value cannot be negative")
-        self.pos = pos
+        self.buf = UnicodeIO(list(initval), pos)
         if not space.is_w(w_dict, space.w_None):
             if not space.isinstance_w(w_dict, space.w_dict):
-                raise oefmt(space.w_TypeError,
-                            "fourth item of state should be a dict, got a %T",
-                            w_dict)
+                raise oefmt(
+                    space.w_TypeError,
+                    "fourth item of state should be a dict, got a %T", w_dict)
             # Alternatively, we could replace the internal dictionary
             # completely. However, it seems more practical to just update it.
             space.call_method(self.w_dict, "update", w_dict)
@@ -107,86 +200,47 @@
                 message = "I/O operation on closed file"
             raise OperationError(space.w_ValueError, space.newtext(message))
 
-    def resize_buffer(self, newlength):
-        if len(self.buf) > newlength:
-            self.buf = self.buf[:newlength]
-        if len(self.buf) < newlength:
-            self.buf.extend([u'\0'] * (newlength - len(self.buf)))
-
-    def write(self, string):
-        length = len(string)
-        if self.pos + length > len(self.buf):
-            self.resize_buffer(self.pos + length)
-
-        for i in range(length):
-            self.buf[self.pos + i] = string[i]
-        self.pos += length
-
     def write_w(self, space, w_obj):
         if not space.isinstance_w(w_obj, space.w_unicode):
             raise oefmt(space.w_TypeError,
                         "unicode argument expected, got '%T'", w_obj)
         self._check_closed(space)
-
         orig_size = space.len_w(w_obj)
 
         if self.w_decoder is not None:
             w_decoded = space.call_method(
-                self.w_decoder, "decode", w_obj, space.w_True
-            )
+                self.w_decoder, "decode", w_obj, space.w_True)
         else:
             w_decoded = w_obj
-
         if self.writenl:
             w_decoded = space.call_method(
-                w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl)
-            )
+                w_decoded, "replace",
+                space.newtext("\n"), space.newunicode(self.writenl))
+        string = space.unicode_w(w_decoded)
+        if string:
+            self.buf.write(string)
 
-        string = space.unicode_w(w_decoded)
-        size = len(string)
-
-        if size:
-            self.write(string)
         return space.newint(orig_size)
 
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        start = self.pos
-        available = len(self.buf) - start
-        if available <= 0:
-            return space.newunicode(u"")
-        if size >= 0 and size <= available:
-            end = start + size
-        else:
-            end = len(self.buf)
-        assert 0 <= start <= end
-        self.pos = end
-        return space.newunicode(u''.join(self.buf[start:end]))
+        return space.newunicode(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
+        if self.readuniversal:
+            result = self.buf.readline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                newline = self.readnl
+            result = self.buf.readline(newline, limit)
+        return space.newunicode(result)
 
-        if self.pos >= len(self.buf):
-            return space.newunicode(u"")
-
-        start = self.pos
-        if limit < 0 or limit > len(self.buf) - self.pos:
-            limit = len(self.buf) - self.pos
-        assert limit >= 0
-
-        endpos, found = self._find_line_ending(
-            # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf),
-            start,
-            limit
-        )
-        if not found:
-            endpos = start + limit
-        assert endpos >= 0
-        self.pos = endpos
-        return space.newunicode(u"".join(self.buf[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):
@@ -202,32 +256,27 @@
 
         # XXX: this makes almost no sense, but its how CPython does it.
         if mode == 1:
-            pos = self.pos
+            pos = self.buf.pos
         elif mode == 2:
-            pos = len(self.buf)
-
+            pos = len(self.buf.data)
         assert pos >= 0
-        self.pos = pos
+        self.buf.seek(pos)
         return space.newint(pos)
 
     def truncate_w(self, space, w_size=None):
         self._check_closed(space)
         if space.is_none(w_size):
-            size = self.pos
+            size = self.buf.pos
         else:
             size = space.int_w(w_size)
-
         if size < 0:
             raise oefmt(space.w_ValueError, "Negative size value %d", size)
-
-        if size < len(self.buf):
-            self.resize_buffer(size)
-
+        self.buf.truncate(size)
         return space.newint(size)
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(u''.join(self.buf))
+        return space.newunicode(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -221,50 +221,6 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_newline_universal(self, line, start, limit):
-        # Universal newline search. Find any of \r, \r\n, \n
-        # The decoder ensures that \r\n are not split in two pieces
-        limit = min(limit, len(line) - start)
-        end = start + limit
-        i = start
-        while i < end:
-            ch = line[i]
-            i += 1
-            if ch == '\n':
-                return i, True
-            if ch == '\r':
-                if i >= end:
-                    break
-                if line[i] == '\n':
-                    return i + 1, True
-                else:
-                    return i, True
-        return end, False
-
-    def _find_marker(self, marker, line, start, limit):
-        limit = min(limit, len(line) - start)
-        end = start + limit
-        for i in range(start, end - len(marker) + 1):
-            ch = line[i]
-            if ch == marker[0]:
-                for j in range(1, len(marker)):
-                    if line[i + j] != marker[j]:
-                        break  # from inner loop
-                else:
-                    return i + len(marker), True
-        return end - len(marker) + 1, False
-
-    def _find_line_ending(self, line, start, limit):
-        if self.readuniversal:
-            return self._find_newline_universal(line, start, limit)
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            newline = '\n'
-        else:
-            # Non-universal mode.
-            newline = self.readnl
-        return self._find_marker(newline, line, start, limit)
-
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
     __new__ = generic_new_descr(W_TextIOBase),
@@ -340,6 +296,126 @@
         self.input = input
 
 
+class DecodeBuffer(object):
+    def __init__(self, text=None):
+        self.text = text
+        self.pos = 0
+
+    def set(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.text = space.unicode_w(w_decoded)
+        self.pos = 0
+
+    def reset(self):
+        self.text = None
+        self.pos = 0
+
+    def get_chars(self, size):
+        if self.text is None:
+            return u""
+
+        available = len(self.text) - self.pos
+        if size < 0 or size > available:
+            size = available
+        assert size >= 0
+
+        if self.pos > 0 or size < available:
+            start = self.pos
+            end = self.pos + size
+            assert start >= 0
+            assert end >= 0
+            chars = self.text[start:end]
+        else:
+            chars = self.text
+
+        self.pos += size
+        return chars
+
+    def has_data(self):
+        return (self.text is not None and not self.exhausted())
+
+    def exhausted(self):
+        return self.pos >= len(self.text)
+
+    def next_char(self):
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        self.pos += 1
+        return ch
+
+    def peek_char(self):
+        # like next_char, but doesn't advance pos
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        return ch
+
+    def find_newline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == u'\n':
+                return True
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    ch = self.peek_char()
+                except StopIteration:
+                    return False
+                if ch == u'\n':
+                    self.next_char()
+                    return True
+                else:
+                    return True
+        return False
+
+    def find_crlf(self, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            scanned += 1
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    if self.peek_char() == u'\n':
+                        self.next_char()
+                        return True
+                except StopIteration:
+                    # This is the tricky case: we found a \r right at the end
+                    self.pos -= 1
+                    return False
+        return False
+
+    def find_char(self, marker, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == marker:
+                return True
+            scanned += 1
+        return False
+
+
 def check_decoded(space, w_decoded):
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
@@ -353,8 +429,7 @@
         self.w_encoder = None
         self.w_decoder = None
 
-        self.decoded_chars = None   # buffer for text returned from decoder
-        self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+        self.decoded = DecodeBuffer()
         self.pending_bytes = None   # list of bytes objects waiting to be
                                     # written, or NULL
         self.chunk_size = 8192
@@ -522,44 +597,10 @@
     # _____________________________________________________________
     # read methods
 
-    def _unset_decoded(self):
-        self.decoded_chars = None
-        self.decoded_chars_used = 0
-
-    def _set_decoded(self, space, w_decoded):
-        check_decoded(space, w_decoded)
-        self.decoded_chars = space.utf8_w(w_decoded)
-        self.decoded_chars_used = 0
-
-    def _get_decoded_chars(self, size):
-        if self.decoded_chars is None:
-            return ""
-
-        available = len(self.decoded_chars) - self.decoded_chars_used
-        if size < 0 or size > available:
-            size = available
-        assert size >= 0
-
-        if self.decoded_chars_used > 0 or size < available:
-            start = self.decoded_chars_used
-            end = self.decoded_chars_used + size
-            assert start >= 0
-            assert end >= 0
-            chars = self.decoded_chars[start:end]
-        else:
-            chars = self.decoded_chars
-
-        self.decoded_chars_used += size
-        return chars
-
-    def _has_data(self):
-        return (self.decoded_chars is not None and
-            self.decoded_chars_used < len(self.decoded_chars))
-
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
-        is placed in self._decoded_chars (replacing its previous value).
+        is placed in self.decoded (replacing its previous value).
         The entire input chunk is sent to the decoder, though some of it may
         remain buffered in the decoder, yet to be converted."""
 
@@ -579,7 +620,7 @@
             dec_buffer = None
             dec_flags = 0
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars
+        # Read a chunk, decode it, and put the result in self.decoded
         w_input = space.call_method(self.w_buffer, "read1",
                                     space.newint(self.chunk_size))
 
@@ -591,7 +632,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        self._set_decoded(space, w_decoded)
+        self.decoded.set(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -604,10 +645,10 @@
         return not eof
 
     def _ensure_data(self, space):
-        while not self._has_data():
+        while not self.decoded.has_data():
             try:
                 if not self._read_chunk(space):
-                    self._unset_decoded()
+                    self.decoded.reset()
                     self.snapshot = None
                     return False
             except OperationError as e:
@@ -640,7 +681,7 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.new_from_utf8(self._get_decoded_chars(-1))
+            w_result = space.new_from_utf8(self.decoded.get_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
@@ -652,82 +693,79 @@
         while remaining > 0:
             if not self._ensure_data(space):
                 break
-            data = self._get_decoded_chars(remaining)
+            data = self.decoded.get_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
         return space.new_from_utf8(builder.build())
 
+    def _scan_line_ending(self, limit):
+        if self.readuniversal:
+            return self.decoded.find_newline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                # Non-universal mode.
+                newline = self.readnl
+            if newline == u'\r\n':
+                return self.decoded.find_crlf(limit)
+            else:
+                return self.decoded.find_char(newline[0], limit)
+
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
         self._check_closed(space)
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-
-        line = None
         remnant = None
         builder = StringBuilder()
-
         while True:
             # First, get some data if necessary
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = end_scan = 0
+                if remnant:
+                    builder.append(remnant)
                 break
 
             if remnant:
                 assert not self.readtranslate and self.readnl == '\r\n'
-                assert self.decoded_chars_used == 0
-                if remnant == '\r' and self.decoded_chars[0] == '\n':
+                assert self.decoded.pos == 0
+                if remnant == '\r' and self.decoded.text[0] == '\n':
                     builder.append('\r\n')
-                    self.decoded_chars_used = 1
-                    line = remnant = None
-                    start = end_scan = 0
+                    self.decoded.pos = 1
+                    remnant = None
                     break
                 else:
                     builder.append(remnant)
                     remnant = None
                     continue
 
-            line = self.decoded_chars
-            start = self.decoded_chars_used
             if limit > 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
-                remaining = sys.maxint
-            end_scan, found = self._find_line_ending(line, start, remaining)
-            assert end_scan >= 0
-            if found:
+                remaining = -1
+            start = self.decoded.pos
+            assert start >= 0
+            found = self._scan_line_ending(remaining)
+            end_scan = self.decoded.pos
+            if end_scan > start:
+                s = self.decoded.text[start:end_scan]
+                builder.append(s)
+
+            if found or (limit >= 0 and builder.getlength() >= limit):
                 break
 
-            if limit >= 0 and end_scan - start >= remaining:
-                # Didn't find line ending, but reached length limit
-                break
-
-            # No line ending seen yet - put aside current data
-            if end_scan > start:
-                s = line[start:end_scan]
-                builder.append(s)
-
             # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if end_scan < len(line):
-                remnant = line[end_scan:]
-            line = None
+            if not self.decoded.exhausted():
+                remnant = self.decoded.get_chars(-1)
             # We have consumed the buffer
-            self._unset_decoded()
-
-        if line:
-            # Our line ends in the current buffer
-            self.decoded_chars_used = end_scan
-            if start > 0 or end_scan < len(line):
-                line = line[start:end_scan]
-            builder.append(line)
-        elif remnant:
-            builder.append(remnant)
+            self.decoded.reset()
 
         result = builder.build()
         return space.new_from_utf8(result)
@@ -861,7 +899,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._unset_decoded()
+            self.decoded.reset()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -886,7 +924,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._unset_decoded()
+        self.decoded.reset()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -907,13 +945,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self._set_decoded(space, w_decoded)
+            self.decoded.set(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded_chars) < cookie.chars_to_skip:
+            if len(self.decoded.text) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded_chars_used = cookie.chars_to_skip
+            self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -939,7 +977,7 @@
         w_pos = space.call_method(self.w_buffer, "tell")
 
         if self.w_decoder is None or self.snapshot is None:
-            assert not self.decoded_chars
+            assert not self.decoded.text
             return w_pos
 
         cookie = PositionCookie(space.bigint_w(w_pos))
@@ -950,11 +988,11 @@
         cookie.start_pos -= len(input)
 
         # How many decoded characters have been used up since the snapshot?
-        if not self.decoded_chars_used:
+        if not self.decoded.pos:
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded_chars_used
+        chars_to_skip = self.decoded.pos
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,6 +1,10 @@
-from hypothesis import given, strategies as st, assume
+import pytest
+try:
+    from hypothesis import given, strategies as st, assume
+except ImportError:
+    pytest.skip("hypothesis required")
 from pypy.module._io.interp_bytesio import W_BytesIO
-from pypy.module._io.interp_textio import W_TextIOWrapper
+from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
 
 LINESEP = ['', '\r', '\n', '\r\n']
 
@@ -31,3 +35,34 @@
         else:
             break
     assert u''.join(lines) == txt
+
+ at given(st.text())
+def test_read_buffer(text):
+    buf = DecodeBuffer(text)
+    assert buf.get_chars(-1) == text
+    assert buf.exhausted()
+
+ at given(st.text(), st.lists(st.integers(min_value=0)))
+def test_readn_buffer(text, sizes):
+    buf = DecodeBuffer(text)
+    strings = []
+    for n in sizes:
+        s = buf.get_chars(n)
+        if not buf.exhausted():
+            assert len(s) == n
+        else:
+            assert len(s) <= n
+        strings.append(s)
+    assert ''.join(strings) == text[:sum(sizes)]
+
+ at given(st.text())
+def test_next_char(text):
+    buf = DecodeBuffer(text)
+    chars = []
+    try:
+        while True:
+            chars.append(buf.next_char())
+    except StopIteration:
+        pass
+    assert buf.exhausted()
+    assert u''.join(chars) == text

From pypy.commits at gmail.com  Sat Nov 25 21:40:16 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 18:40:16 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: Reapply b89046216269
Message-ID: <5a1a2990.89ce1c0a.19604.fd42@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93178:52a6abae06e4
Date: 2017-11-26 01:58 +0000
http://bitbucket.org/pypy/pypy/changeset/52a6abae06e4/

Log:	Reapply b89046216269

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1760,6 +1760,10 @@
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
 
+    def unicode_w(self, w_obj):
+        # XXX: kill me!
+        return w_obj.utf8_w(self).decode('utf-8')
+
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,6 +212,12 @@
     def newutf8(self, x, l, f):
         return w_some_obj()
 
+    def new_from_utf8(self, a):
+        return w_some_obj()
+
+    def newunicode(self, a):
+        return w_some_obj()
+
     newtext = newbytes
     newtext_or_none = newbytes
     newfilename = newbytes
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,10 +367,23 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
+    def new_from_utf8(self, utf8s):
+        # XXX: kill me!
+        assert isinstance(utf8s, str)
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return W_UnicodeObject(utf8s, length, flag)
+
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
+    def newunicode(self, unistr):
+        # XXX: kill me!
+        assert isinstance(unistr, unicode)
+        utf8s = unistr.encode("utf-8")
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return self.newutf8(utf8s, length, flag)
+
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)

From pypy.commits at gmail.com  Sat Nov 25 21:40:19 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 18:40:19 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: Adapt DecodeBuffer to utf8
Message-ID: <5a1a2993.078bdf0a.27561.61d3@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93179:e509ec2ccea2
Date: 2017-11-26 01:51 +0000
http://bitbucket.org/pypy/pypy/changeset/e509ec2ccea2/

Log:	Adapt DecodeBuffer to utf8

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -11,7 +11,7 @@
 from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong
 from rpython.rlib.rbigint import rbigint
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8
+from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8, next_codepoint_pos
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -303,7 +303,7 @@
 
     def set(self, space, w_decoded):
         check_decoded(space, w_decoded)
-        self.text = space.unicode_w(w_decoded)
+        self.text = space.utf8_w(w_decoded)
         self.pos = 0
 
     def reset(self):
@@ -312,7 +312,7 @@
 
     def get_chars(self, size):
         if self.text is None:
-            return u""
+            return ""
 
         available = len(self.text) - self.pos
         if size < 0 or size > available:
@@ -341,7 +341,7 @@
         if self.exhausted():
             raise StopIteration
         ch = self.text[self.pos]
-        self.pos += 1
+        self.pos = next_codepoint_pos(self.text, self.pos)
         return ch
 
     def peek_char(self):
@@ -362,16 +362,16 @@
                 ch = self.next_char()
             except StopIteration:
                 return False
-            if ch == u'\n':
+            if ch == '\n':
                 return True
-            if ch == u'\r':
+            if ch == '\r':
                 if scanned >= limit:
                     return False
                 try:
                     ch = self.peek_char()
                 except StopIteration:
                     return False
-                if ch == u'\n':
+                if ch == '\n':
                     self.next_char()
                     return True
                 else:
@@ -388,11 +388,11 @@
             except StopIteration:
                 return False
             scanned += 1
-            if ch == u'\r':
+            if ch == '\r':
                 if scanned >= limit:
                     return False
                 try:
-                    if self.peek_char() == u'\n':
+                    if self.peek_char() == '\n':
                         self.next_char()
                         return True
                 except StopIteration:
@@ -705,11 +705,11 @@
         else:
             if self.readtranslate:
                 # Newlines are already translated, only search for \n
-                newline = u'\n'
+                newline = '\n'
             else:
                 # Non-universal mode.
                 newline = self.readnl
-            if newline == u'\r\n':
+            if newline == '\r\n':
                 return self.decoded.find_crlf(limit)
             else:
                 return self.decoded.find_char(newline[0], limit)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -38,31 +38,27 @@
 
 @given(st.text())
 def test_read_buffer(text):
-    buf = DecodeBuffer(text)
-    assert buf.get_chars(-1) == text
+    buf = DecodeBuffer(text.encode('utf-8'))
+    assert buf.get_chars(-1) == text.encode('utf-8')
     assert buf.exhausted()
 
 @given(st.text(), st.lists(st.integers(min_value=0)))
 def test_readn_buffer(text, sizes):
-    buf = DecodeBuffer(text)
+    buf = DecodeBuffer(text.encode('utf-8'))
     strings = []
     for n in sizes:
         s = buf.get_chars(n)
         if not buf.exhausted():
-            assert len(s) == n
+            assert len(s.decode('utf-8')) == n
         else:
-            assert len(s) <= n
+            assert len(s.decode('utf-8')) <= n
         strings.append(s)
-    assert ''.join(strings) == text[:sum(sizes)]
+    assert ''.join(strings) == text[:sum(sizes)].encode('utf-8')
 
 @given(st.text())
 def test_next_char(text):
-    buf = DecodeBuffer(text)
-    chars = []
-    try:
-        while True:
-            chars.append(buf.next_char())
-    except StopIteration:
-        pass
+    buf = DecodeBuffer(text.encode('utf-8'))
+    for i in range(len(text)):
+        ch = buf.next_char()
+        assert ch == text[i].encode('utf-8')[0]
     assert buf.exhausted()
-    assert u''.join(chars) == text

From pypy.commits at gmail.com  Sat Nov 25 21:40:21 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 18:40:21 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: Fix seek() and tell()
Message-ID: <5a1a2995.a1abdf0a.9c29.76f9@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93180:9ff11e92d368
Date: 2017-11-26 02:28 +0000
http://bitbucket.org/pypy/pypy/changeset/9ff11e92d368/

Log:	Fix seek() and tell()

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -11,7 +11,8 @@
 from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong
 from rpython.rlib.rbigint import rbigint
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8, next_codepoint_pos
+from rpython.rlib.rutf8 import (
+    FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8)
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -420,6 +421,7 @@
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
         raise oefmt(space.w_TypeError, msg, w_decoded)
+    return w_decoded
 
 
 class W_TextIOWrapper(W_TextIOBase):
@@ -945,13 +947,14 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self.decoded.set(space, w_decoded)
+            w_decoded = check_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded.text) < cookie.chars_to_skip:
+            if space.len_w(w_decoded) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded.pos = cookie.chars_to_skip
+            self.decoded.set(space, w_decoded)
+            self.decoded.pos = w_decoded._index_to_byte(cookie.chars_to_skip)
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -963,10 +966,8 @@
 
     def tell_w(self, space):
         self._check_closed(space)
-
         if not self.seekable:
             raise oefmt(space.w_IOError, "underlying stream is not seekable")
-
         if not self.telling:
             raise oefmt(space.w_IOError,
                         "telling position disabled by next() call")
@@ -992,7 +993,8 @@
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded.pos
+        chars_to_skip = codepoints_in_utf8(
+            self.decoded.text, end=self.decoded.pos)
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.
@@ -1036,14 +1038,14 @@
                 # We didn't get enough decoded data; signal EOF to get more.
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(""),
-                                              space.newint(1)) # final=1
+                                              space.newint(1))  # final=1
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
                 cookie.need_eof = 1
 
                 if chars_decoded < chars_to_skip:
                     raise oefmt(space.w_IOError,
-                                "can't reconstruct logical file position")
+                        "can't reconstruct logical file position")
         finally:
             space.call_method(self.w_decoder, "setstate", w_saved_state)
 

From pypy.commits at gmail.com  Sat Nov 25 21:40:23 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 18:40:23 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: Convert stringio to utf8
Message-ID: <5a1a2997.0eef1c0a.3a2c3.828a@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93181:8a64a04eb505
Date: 2017-11-26 02:37 +0000
http://bitbucket.org/pypy/pypy/changeset/8a64a04eb505/

Log:	Convert stringio to utf8

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -17,20 +17,20 @@
         if len(self.data) > newlength:
             self.data = self.data[:newlength]
         if len(self.data) < newlength:
-            self.data.extend([u'\0'] * (newlength - len(self.data)))
+            self.data.extend(['\0'] * (newlength - len(self.data)))
 
     def read(self, size):
         start = self.pos
         available = len(self.data) - start
         if available <= 0:
-            return u''
+            return ''
         if size >= 0 and size <= available:
             end = start + size
         else:
             end = len(self.data)
         assert 0 <= start <= end
         self.pos = end
-        return u''.join(self.data[start:end])
+        return ''.join(self.data[start:end])
 
     def _convert_limit(self, limit):
         if limit < 0 or limit > len(self.data) - self.pos:
@@ -58,7 +58,7 @@
                 else:
                     break
         self.pos = pos
-        result = u''.join(self.data[start:pos])
+        result = ''.join(self.data[start:pos])
         return result
 
     def readline(self, marker, limit):
@@ -79,7 +79,7 @@
         if not found:
             pos = end
         self.pos = pos
-        result = u''.join(self.data[start:pos])
+        result = ''.join(self.data[start:pos])
         return result
 
     def write(self, string):
@@ -99,7 +99,7 @@
             self.resize(size)
 
     def getvalue(self):
-        return u''.join(self.data)
+        return ''.join(self.data)
 
 
 class W_StringIO(W_TextIOBase):
@@ -118,10 +118,10 @@
         if space.is_w(w_newline, space.w_None):
             newline = None
         else:
-            newline = space.unicode_w(w_newline)
+            newline = space.utf8_w(w_newline)
 
-        if (newline is not None and newline != u"" and newline != u"\n" and
-                newline != u"\r" and newline != u"\r\n"):
+        if (newline is not None and newline != "" and newline != "\n" and
+                newline != "\r" and newline != "\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -131,9 +131,9 @@
             )
         if newline is not None:
             self.readnl = newline
-        self.readuniversal = newline is None or newline == u""
+        self.readuniversal = newline is None or newline == ""
         self.readtranslate = newline is None
-        if newline and newline[0] == u"\r":
+        if newline and newline[0] == "\r":
             self.writenl = newline
         if self.readuniversal:
             self.w_decoder = space.call_function(
@@ -152,7 +152,7 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
+            w_readnl = space.str(space.new_from_utf8(self.readnl))  # YYY
         return space.newtuple([
             w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
@@ -179,7 +179,7 @@
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
-        initval = space.unicode_w(w_initval)
+        initval = space.utf8_w(w_initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
@@ -215,8 +215,8 @@
         if self.writenl:
             w_decoded = space.call_method(
                 w_decoded, "replace",
-                space.newtext("\n"), space.newunicode(self.writenl))
-        string = space.unicode_w(w_decoded)
+                space.newtext("\n"), space.new_from_utf8(self.writenl))
+        string = space.utf8_w(w_decoded)
         if string:
             self.buf.write(string)
 
@@ -225,7 +225,7 @@
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        return space.newunicode(self.buf.read(size))
+        return space.new_from_utf8(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
@@ -235,11 +235,11 @@
         else:
             if self.readtranslate:
                 # Newlines are already translated, only search for \n
-                newline = u'\n'
+                newline = '\n'
             else:
                 newline = self.readnl
             result = self.buf.readline(newline, limit)
-        return space.newunicode(result)
+        return space.new_from_utf8(result)
 
 
     @unwrap_spec(pos=int, mode=int)
@@ -276,7 +276,7 @@
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(self.buf.getvalue())
+        return space.new_from_utf8(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)

From pypy.commits at gmail.com  Sat Nov 25 21:42:51 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 18:42:51 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: kill test: _io needs the real space
Message-ID: <5a1a2a2b.e4a6df0a.72dd5.12bb@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93182:e31d72c624a8
Date: 2017-11-26 02:40 +0000
http://bitbucket.org/pypy/pypy/changeset/e31d72c624a8/

Log:	kill test: _io needs the real space

diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py
deleted file mode 100644
--- a/pypy/module/_io/test/test_ztranslation.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from pypy.objspace.fake.checkmodule import checkmodule
-
-def test_checkmodule():
-    checkmodule('_io')

From pypy.commits at gmail.com  Sat Nov 25 22:10:19 2017
From: pypy.commits at gmail.com (rlamy)
Date: Sat, 25 Nov 2017 19:10:19 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a1a309b.8b8a1c0a.47f9e.61a7@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93183:8125ba2d1fc1
Date: 2017-11-26 03:02 +0000
http://bitbucket.org/pypy/pypy/changeset/8125ba2d1fc1/

Log:	hg merge default

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,28 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(txt, mode, limit):
+    textio = TextIOWrapper(
+        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+    lines = []
+    while True:
+        line = textio.readline(limit)
+        if limit > 0:
+            assert len(line) < limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -2,21 +2,115 @@
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
+from pypy.module._io.interp_textio import (
+        W_TextIOBase, W_IncrementalNewlineDecoder)
 from pypy.module._io.interp_iobase import convert_size
 
+class UnicodeIO(object):
+    def __init__(self, data=None, pos=0):
+        if data is None:
+            data = []
+        self.data = data
+        self.pos = pos
+
+    def resize(self, newlength):
+        if len(self.data) > newlength:
+            self.data = self.data[:newlength]
+        if len(self.data) < newlength:
+            self.data.extend([u'\0'] * (newlength - len(self.data)))
+
+    def read(self, size):
+        start = self.pos
+        available = len(self.data) - start
+        if available <= 0:
+            return u''
+        if size >= 0 and size <= available:
+            end = start + size
+        else:
+            end = len(self.data)
+        assert 0 <= start <= end
+        self.pos = end
+        return u''.join(self.data[start:end])
+
+    def _convert_limit(self, limit):
+        if limit < 0 or limit > len(self.data) - self.pos:
+            limit = len(self.data) - self.pos
+        assert limit >= 0
+        return limit
+
+    def readline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        limit = self._convert_limit(limit)
+        start = self.pos
+        end = start + limit
+        pos = start
+        while pos < end:
+            ch = self.data[pos]
+            pos += 1
+            if ch == '\n':
+                break
+            if ch == '\r':
+                if pos >= end:
+                    break
+                if self.data[pos] == '\n':
+                    pos += 1
+                    break
+                else:
+                    break
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def readline(self, marker, limit):
+        start = self.pos
+        limit = self._convert_limit(limit)
+        end = start + limit
+        found = False
+        for pos in range(start, end - len(marker) + 1):
+            ch = self.data[pos]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if self.data[pos + j] != marker[j]:
+                        break  # from inner loop
+                else:
+                    pos += len(marker)
+                    found = True
+                    break
+        if not found:
+            pos = end
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def write(self, string):
+        length = len(string)
+        if self.pos + length > len(self.data):
+            self.resize(self.pos + length)
+
+        for i in range(length):
+            self.data[self.pos + i] = string[i]
+        self.pos += length
+
+    def seek(self, pos):
+        self.pos = pos
+
+    def truncate(self, size):
+        if size < len(self.data):
+            self.resize(size)
+
+    def getvalue(self):
+        return u''.join(self.data)
+
 
 class W_StringIO(W_TextIOBase):
     def __init__(self, space):
         W_TextIOBase.__init__(self, space)
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
 
     @unwrap_spec(w_newline = WrappedDefault(u"\n"))
     def descr_init(self, space, w_initvalue=None, w_newline=None):
         # In case __init__ is called multiple times
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
         self.w_decoder = None
         self.readnl = None
         self.writenl = None
@@ -27,7 +121,7 @@
             newline = space.unicode_w(w_newline)
 
         if (newline is not None and newline != u"" and newline != u"\n" and
-            newline != u"\r" and newline != u"\r\n"):
+                newline != u"\r" and newline != u"\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -50,7 +144,7 @@
 
         if not space.is_none(w_initvalue):
             self.write_w(space, w_initvalue)
-            self.pos = 0
+            self.buf.pos = 0
 
     def descr_getstate(self, space):
         w_initialval = self.getvalue_w(space)
@@ -58,9 +152,9 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl)) # YYY
+            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
         return space.newtuple([
-            w_initialval, w_readnl, space.newint(self.pos), w_dict
+            w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
 
     def descr_setstate(self, space, w_state):
@@ -69,34 +163,33 @@
         # We allow the state tuple to be longer than 4, because we may need
         # someday to extend the object's state without breaking
         # backwards-compatibility
-        if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4:
+        if (not space.isinstance_w(w_state, space.w_tuple)
+                or space.len_w(w_state) < 4):
             raise oefmt(space.w_TypeError,
                         "%T.__setstate__ argument should be a 4-tuple, got %T",
                         self, w_state)
         w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4)
+        if not space.isinstance_w(w_initval, space.w_unicode):
+            raise oefmt(space.w_TypeError,
+                        "unicode argument expected, got '%T'", w_initval)
         # Initialize state
-        self.descr_init(space, w_initval, w_readnl)
+        self.descr_init(space, None, w_readnl)
 
-        # Restore the buffer state. Even if __init__ did initialize the buffer,
-        # we have to initialize it again since __init__ may translates the
-        # newlines in the inital_value string. We clearly do not want that
+        # Restore the buffer state. We're not doing it via __init__
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
         initval = space.unicode_w(w_initval)
-        size = len(initval)
-        self.resize_buffer(size)
-        self.buf = list(initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
                         "position value cannot be negative")
-        self.pos = pos
+        self.buf = UnicodeIO(list(initval), pos)
         if not space.is_w(w_dict, space.w_None):
             if not space.isinstance_w(w_dict, space.w_dict):
-                raise oefmt(space.w_TypeError,
-                            "fourth item of state should be a dict, got a %T",
-                            w_dict)
+                raise oefmt(
+                    space.w_TypeError,
+                    "fourth item of state should be a dict, got a %T", w_dict)
             # Alternatively, we could replace the internal dictionary
             # completely. However, it seems more practical to just update it.
             space.call_method(self.w_dict, "update", w_dict)
@@ -107,88 +200,47 @@
                 message = "I/O operation on closed file"
             raise OperationError(space.w_ValueError, space.newtext(message))
 
-    def resize_buffer(self, newlength):
-        if len(self.buf) > newlength:
-            self.buf = self.buf[:newlength]
-        if len(self.buf) < newlength:
-            self.buf.extend([u'\0'] * (newlength - len(self.buf)))
-
-    def write(self, string):
-        length = len(string)
-        if self.pos + length > len(self.buf):
-            self.resize_buffer(self.pos + length)
-
-        for i in range(length):
-            self.buf[self.pos + i] = string[i]
-        self.pos += length
-
     def write_w(self, space, w_obj):
         if not space.isinstance_w(w_obj, space.w_unicode):
             raise oefmt(space.w_TypeError,
                         "unicode argument expected, got '%T'", w_obj)
         self._check_closed(space)
-
         orig_size = space.len_w(w_obj)
 
         if self.w_decoder is not None:
             w_decoded = space.call_method(
-                self.w_decoder, "decode", w_obj, space.w_True
-            )
+                self.w_decoder, "decode", w_obj, space.w_True)
         else:
             w_decoded = w_obj
-
         if self.writenl:
             w_decoded = space.call_method(
-                w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl)
-            )
+                w_decoded, "replace",
+                space.newtext("\n"), space.newunicode(self.writenl))
+        string = space.unicode_w(w_decoded)
+        if string:
+            self.buf.write(string)
 
-        string = space.unicode_w(w_decoded)
-        size = len(string)
-
-        if size:
-            self.write(string)
         return space.newint(orig_size)
 
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        start = self.pos
-        available = len(self.buf) - start
-        if available <= 0:
-            return space.newunicode(u"")
-        if size >= 0 and size <= available:
-            end = start + size
-        else:
-            end = len(self.buf)
-        assert 0 <= start <= end
-        self.pos = end
-        return space.newunicode(u''.join(self.buf[start:end]))
+        return space.newunicode(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
+        if self.readuniversal:
+            result = self.buf.readline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                newline = self.readnl
+            result = self.buf.readline(newline, limit)
+        return space.newunicode(result)
 
-        if self.pos >= len(self.buf):
-            return space.newunicode(u"")
-
-        start = self.pos
-        if limit < 0 or limit > len(self.buf) - self.pos:
-            limit = len(self.buf) - self.pos
-
-        assert limit >= 0
-        end = start + limit
-
-        endpos, consumed = self._find_line_ending(
-            # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf),
-            start,
-            end
-        )
-        if endpos < 0:
-            endpos = end
-        assert endpos >= 0
-        self.pos = endpos
-        return space.newunicode(u"".join(self.buf[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):
@@ -204,32 +256,27 @@
 
         # XXX: this makes almost no sense, but its how CPython does it.
         if mode == 1:
-            pos = self.pos
+            pos = self.buf.pos
         elif mode == 2:
-            pos = len(self.buf)
-
+            pos = len(self.buf.data)
         assert pos >= 0
-        self.pos = pos
+        self.buf.seek(pos)
         return space.newint(pos)
 
     def truncate_w(self, space, w_size=None):
         self._check_closed(space)
         if space.is_none(w_size):
-            size = self.pos
+            size = self.buf.pos
         else:
             size = space.int_w(w_size)
-
         if size < 0:
             raise oefmt(space.w_ValueError, "Negative size value %d", size)
-
-        if size < len(self.buf):
-            self.resize_buffer(size)
-
+        self.buf.truncate(size)
         return space.newint(size)
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(u''.join(self.buf))
+        return space.newunicode(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -214,45 +214,6 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_line_ending(self, line, start, end):
-        size = end - start
-        if self.readuniversal:
-            # Universal newline search. Find any of \r, \r\n, \n
-            # The decoder ensures that \r\n are not split in two pieces
-            i = start
-            while True:
-                # Fast path for non-control chars.
-                while i < end and line[i] > '\r':
-                    i += 1
-                if i >= end:
-                    return -1, size
-                ch = line[i]
-                i += 1
-                if ch == '\n':
-                    return i, 0
-                if ch == '\r':
-                    if line[i] == '\n':
-                        return i + 1, 0
-                    else:
-                        return i, 0
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            newline = u'\n'
-        else:
-            # Non-universal mode.
-            newline = self.readnl
-        end_scan = end - len(newline) + 1
-        for i in range(start, end_scan):
-            ch = line[i]
-            if ch == newline[0]:
-                for j in range(1, len(newline)):
-                    if line[i + j] != newline[j]:
-                        break
-                else:
-                    return i + len(newline), 0
-        return -1, end_scan
-
-
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
     __new__ = generic_new_descr(W_TextIOBase),
@@ -343,6 +304,126 @@
         self.input = input
 
 
+class DecodeBuffer(object):
+    def __init__(self, text=None):
+        self.text = text
+        self.pos = 0
+
+    def set(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.text = space.unicode_w(w_decoded)
+        self.pos = 0
+
+    def reset(self):
+        self.text = None
+        self.pos = 0
+
+    def get_chars(self, size):
+        if self.text is None:
+            return u""
+
+        available = len(self.text) - self.pos
+        if size < 0 or size > available:
+            size = available
+        assert size >= 0
+
+        if self.pos > 0 or size < available:
+            start = self.pos
+            end = self.pos + size
+            assert start >= 0
+            assert end >= 0
+            chars = self.text[start:end]
+        else:
+            chars = self.text
+
+        self.pos += size
+        return chars
+
+    def has_data(self):
+        return (self.text is not None and not self.exhausted())
+
+    def exhausted(self):
+        return self.pos >= len(self.text)
+
+    def next_char(self):
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        self.pos += 1
+        return ch
+
+    def peek_char(self):
+        # like next_char, but doesn't advance pos
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        return ch
+
+    def find_newline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == u'\n':
+                return True
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    ch = self.peek_char()
+                except StopIteration:
+                    return False
+                if ch == u'\n':
+                    self.next_char()
+                    return True
+                else:
+                    return True
+        return False
+
+    def find_crlf(self, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            scanned += 1
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    if self.peek_char() == u'\n':
+                        self.next_char()
+                        return True
+                except StopIteration:
+                    # This is the tricky case: we found a \r right at the end
+                    self.pos -= 1
+                    return False
+        return False
+
+    def find_char(self, marker, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == marker:
+                return True
+            scanned += 1
+        return False
+
+
 def check_decoded(space, w_decoded):
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
@@ -356,8 +437,7 @@
         self.w_encoder = None
         self.w_decoder = None
 
-        self.decoded_chars = None   # buffer for text returned from decoder
-        self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+        self.decoded = DecodeBuffer()
         self.pending_bytes = None   # list of bytes objects waiting to be
                                     # written, or NULL
         self.chunk_size = 8192
@@ -546,44 +626,10 @@
     # _____________________________________________________________
     # read methods
 
-    def _unset_decoded(self):
-        self.decoded_chars = None
-        self.decoded_chars_used = 0
-
-    def _set_decoded(self, space, w_decoded):
-        check_decoded(space, w_decoded)
-        self.decoded_chars = space.unicode_w(w_decoded)
-        self.decoded_chars_used = 0
-
-    def _get_decoded_chars(self, size):
-        if self.decoded_chars is None:
-            return u""
-
-        available = len(self.decoded_chars) - self.decoded_chars_used
-        if size < 0 or size > available:
-            size = available
-        assert size >= 0
-
-        if self.decoded_chars_used > 0 or size < available:
-            start = self.decoded_chars_used
-            end = self.decoded_chars_used + size
-            assert start >= 0
-            assert end >= 0
-            chars = self.decoded_chars[start:end]
-        else:
-            chars = self.decoded_chars
-
-        self.decoded_chars_used += size
-        return chars
-
-    def _has_data(self):
-        return (self.decoded_chars is not None and
-            self.decoded_chars_used < len(self.decoded_chars))
-
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
-        is placed in self._decoded_chars (replacing its previous value).
+        is placed in self.decoded (replacing its previous value).
         The entire input chunk is sent to the decoder, though some of it may
         remain buffered in the decoder, yet to be converted."""
 
@@ -607,7 +653,7 @@
             dec_buffer = None
             dec_flags = 0
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars
+        # Read a chunk, decode it, and put the result in self.decoded
         func_name = "read1" if self.has_read1 else "read"
         w_input = space.call_method(self.w_buffer, func_name,
                                     space.newint(self.chunk_size))
@@ -622,7 +668,7 @@
         eof = input_buf.getlength() == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        self._set_decoded(space, w_decoded)
+        self.decoded.set(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -635,10 +681,10 @@
         return not eof
 
     def _ensure_data(self, space):
-        while not self._has_data():
+        while not self.decoded.has_data():
             try:
                 if not self._read_chunk(space):
-                    self._unset_decoded()
+                    self.decoded.reset()
                     self.snapshot = None
                     return False
             except OperationError as e:
@@ -671,7 +717,7 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.newunicode(self._get_decoded_chars(-1))
+            w_result = space.newunicode(self.decoded.get_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
@@ -683,83 +729,79 @@
         while remaining > 0:
             if not self._ensure_data(space):
                 break
-            data = self._get_decoded_chars(remaining)
+            data = self.decoded.get_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
         return space.newunicode(builder.build())
 
+    def _scan_line_ending(self, limit):
+        if self.readuniversal:
+            return self.decoded.find_newline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                # Non-universal mode.
+                newline = self.readnl
+            if newline == u'\r\n':
+                return self.decoded.find_crlf(limit)
+            else:
+                return self.decoded.find_char(newline[0], limit)
+
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
         self._check_closed(space)
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-
-        line = None
-        remaining = None
+        remnant = None
         builder = UnicodeBuilder()
-
         while True:
             # First, get some data if necessary
             has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                start = endpos = offset_to_buffer = 0
+                if remnant:
+                    builder.append(remnant)
                 break
 
-            if not remaining:
-                line = self.decoded_chars
-                start = self.decoded_chars_used
-                offset_to_buffer = 0
+            if remnant:
+                assert not self.readtranslate and self.readnl == u'\r\n'
+                assert self.decoded.pos == 0
+                if remnant == u'\r' and self.decoded.text[0] == u'\n':
+                    builder.append(u'\r\n')
+                    self.decoded.pos = 1
+                    remnant = None
+                    break
+                else:
+                    builder.append(remnant)
+                    remnant = None
+                    continue
+
+            if limit > 0:
+                remaining = limit - builder.getlength()
+                assert remaining >= 0
             else:
-                assert self.decoded_chars_used == 0
-                line = remaining + self.decoded_chars
-                start = 0
-                offset_to_buffer = len(remaining)
-                remaining = None
+                remaining = -1
+            start = self.decoded.pos
+            assert start >= 0
+            found = self._scan_line_ending(remaining)
+            end_scan = self.decoded.pos
+            if end_scan > start:
+                s = self.decoded.text[start:end_scan]
+                builder.append(s)
 
-            line_len = len(line)
-            endpos, consumed = self._find_line_ending(line, start, line_len)
-            chunked = builder.getlength()
-            if endpos >= 0:
-                if limit >= 0 and endpos >= start + limit - chunked:
-                    endpos = start + limit - chunked
-                    assert endpos >= 0
-                break
-            assert consumed >= 0
-
-            # We can put aside up to `endpos`
-            endpos = consumed + start
-            if limit >= 0 and endpos >= start + limit - chunked:
-                # Didn't find line ending, but reached length limit
-                endpos = start + limit - chunked
-                assert endpos >= 0
+            if found or (limit >= 0 and builder.getlength() >= limit):
                 break
 
-            # No line ending seen yet - put aside current data
-            if endpos > start:
-                s = line[start:endpos]
-                builder.append(s)
-
-            # There may be some remaining bytes we'll have to prepend to the
+            # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if endpos < line_len:
-                remaining = line[endpos:]
-            line = None
+            if not self.decoded.exhausted():
+                remnant = self.decoded.get_chars(-1)
             # We have consumed the buffer
-            self._unset_decoded()
-
-        if line:
-            # Our line ends in the current buffer
-            decoded_chars_used = endpos - offset_to_buffer
-            assert decoded_chars_used >= 0
-            self.decoded_chars_used = decoded_chars_used
-            if start > 0 or endpos < len(line):
-                line = line[start:endpos]
-            builder.append(line)
-        elif remaining:
-            builder.append(remaining)
+            self.decoded.reset()
 
         result = builder.build()
         return space.newunicode(result)
@@ -903,7 +945,7 @@
                 self._unsupportedoperation(
                     space, "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._unset_decoded()
+            self.decoded.reset()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -933,7 +975,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._unset_decoded()
+        self.decoded.reset()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -954,13 +996,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self._set_decoded(space, w_decoded)
+            self.decoded.set(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded_chars) < cookie.chars_to_skip:
+            if len(self.decoded.text) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded_chars_used = cookie.chars_to_skip
+            self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -987,7 +1029,7 @@
         w_pos = space.call_method(self.w_buffer, "tell")
 
         if self.w_decoder is None or self.snapshot is None:
-            assert not self.decoded_chars
+            assert not self.decoded.text
             return w_pos
 
         cookie = PositionCookie(space.bigint_w(w_pos))
@@ -998,11 +1040,11 @@
         cookie.start_pos -= len(input)
 
         # How many decoded characters have been used up since the snapshot?
-        if not self.decoded_chars_used:
+        if not self.decoded.pos:
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded_chars_used
+        chars_to_skip = self.decoded.pos
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -0,0 +1,68 @@
+import pytest
+try:
+    from hypothesis import given, strategies as st, assume
+except ImportError:
+    pytest.skip("hypothesis required")
+from pypy.module._io.interp_bytesio import W_BytesIO
+from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(space, txt, mode, limit):
+    assume(limit != 0)
+    w_stream = W_BytesIO(space)
+    w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
+    w_textio = W_TextIOWrapper(space)
+    w_textio.descr_init(
+        space, w_stream, encoding='utf-8',
+        w_newline=space.newtext(mode))
+    lines = []
+    while True:
+        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        if limit > 0:
+            assert len(line) <= limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
+
+ at given(st.text())
+def test_read_buffer(text):
+    buf = DecodeBuffer(text)
+    assert buf.get_chars(-1) == text
+    assert buf.exhausted()
+
+ at given(st.text(), st.lists(st.integers(min_value=0)))
+def test_readn_buffer(text, sizes):
+    buf = DecodeBuffer(text)
+    strings = []
+    for n in sizes:
+        s = buf.get_chars(n)
+        if not buf.exhausted():
+            assert len(s) == n
+        else:
+            assert len(s) <= n
+        strings.append(s)
+    assert ''.join(strings) == text[:sum(sizes)]
+
+ at given(st.text())
+def test_next_char(text):
+    buf = DecodeBuffer(text)
+    chars = []
+    try:
+        while True:
+            chars.append(buf.next_char())
+    except StopIteration:
+        pass
+    assert buf.exhausted()
+    assert u''.join(chars) == text

From pypy.commits at gmail.com  Sun Nov 26 16:24:20 2017
From: pypy.commits at gmail.com (mattip)
Date: Sun, 26 Nov 2017 13:24:20 -0800 (PST)
Subject: [pypy-commit] pypy default: DOC: how to hack your win32 environment
 to build cffi modules without setuptools
Message-ID: <5a1b3104.9085df0a.341f4.29e4@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93184:7b43c9a3c3d2
Date: 2017-11-26 23:23 +0200
http://bitbucket.org/pypy/pypy/changeset/7b43c9a3c3d2/

Log:	DOC: how to hack your win32 environment to build cffi modules
	without setuptools

diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -25,8 +25,10 @@
 
 This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has
 made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link
-was checked in Nov 2016). Note that the compiler suite will be installed in
-``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``.
+was checked in Nov 2016). Note that the compiler suite may be installed in
+``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``
+or in
+``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``.
 A current version of ``setuptools`` will be able to find it there. For
 Windows 10, you must right-click the download, and under ``Properties`` ->
 ``Compatibility`` mark it as ``Run run this program in comatibility mode for``
@@ -41,7 +43,6 @@
 -----------------------------------
 
 We routinely test translation using v9, also known as Visual Studio 2008.
-Our buildbot is still using the Express Edition, not the compiler noted above.
 Other configurations may work as well.
 
 The translation scripts will set up the appropriate environment variables
@@ -81,6 +82,30 @@
 
 .. _build instructions: http://pypy.org/download.html#building-from-source
 
+Setting Up Visual Studio for building SSL in Python3
+----------------------------------------------------
+
+On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after
+translation. However ``distutils`` does not support the Micorosft-provided Visual C
+compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The
+traditional solution to this problem is to install the ``setuptools`` module
+via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However
+``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on
+``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which
+depends on ``ssl``.
+
+In order to solve this, the buildbot sets an environment varaible that helps
+``distutils`` find the compiler without ``setuptools``::
+
+     set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin
+
+or whatever is appropriate for your machine. Note that this is not enough, you
+must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
+``...\9.0\VC`` directory, and edit it, changing the lines that set
+``VCINSTALLDIR`` and ``WindowsSdkDir``::
+    set VCINSTALLDIR=%~dp0\
+    set WindowsSdkDir=%~dp0\..\WinSDK\
+
 
 Preparing Windows for the large build
 -------------------------------------

From pypy.commits at gmail.com  Mon Nov 27 16:17:18 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 27 Nov 2017 13:17:18 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: merge heads
Message-ID: <5a1c80de.5d87df0a.a0b86.e9f5@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93186:350cb9b5b92b
Date: 2017-11-27 22:16 +0100
http://bitbucket.org/pypy/pypy/changeset/350cb9b5b92b/

Log:	merge heads

diff too long, truncating to 2000 out of 2094 lines

diff --git a/TODO b/TODO
--- a/TODO
+++ b/TODO
@@ -9,3 +9,5 @@
 * remove assertions from W_UnicodeObject.__init__ if all the builders pass
 * what to do with error handlers that go backwards. There were tests
   in test_codecs that would check for that
+
+* fix _pypyjson to not use a wrapped dict when decoding an object
diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,28 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(txt, mode, limit):
+    textio = TextIOWrapper(
+        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+    lines = []
+    while True:
+        line = textio.readline(limit)
+        if limit > 0:
+            assert len(line) < limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1760,10 +1760,6 @@
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
 
-    def unicode_w(self, w_obj):
-        # XXX: kill me!
-        return w_obj.utf8_w(self).decode('utf-8')
-
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+    if sys.platform.startswith('linux'):
+        from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+        configure_libbacktrace_linux()
diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -2,21 +2,115 @@
 from pypy.interpreter.typedef import (
     TypeDef, generic_new_descr, GetSetProperty)
 from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault
-from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder
+from pypy.module._io.interp_textio import (
+        W_TextIOBase, W_IncrementalNewlineDecoder)
 from pypy.module._io.interp_iobase import convert_size
 
+class UnicodeIO(object):
+    def __init__(self, data=None, pos=0):
+        if data is None:
+            data = []
+        self.data = data
+        self.pos = pos
+
+    def resize(self, newlength):
+        if len(self.data) > newlength:
+            self.data = self.data[:newlength]
+        if len(self.data) < newlength:
+            self.data.extend([u'\0'] * (newlength - len(self.data)))
+
+    def read(self, size):
+        start = self.pos
+        available = len(self.data) - start
+        if available <= 0:
+            return u''
+        if size >= 0 and size <= available:
+            end = start + size
+        else:
+            end = len(self.data)
+        assert 0 <= start <= end
+        self.pos = end
+        return u''.join(self.data[start:end])
+
+    def _convert_limit(self, limit):
+        if limit < 0 or limit > len(self.data) - self.pos:
+            limit = len(self.data) - self.pos
+        assert limit >= 0
+        return limit
+
+    def readline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        limit = self._convert_limit(limit)
+        start = self.pos
+        end = start + limit
+        pos = start
+        while pos < end:
+            ch = self.data[pos]
+            pos += 1
+            if ch == '\n':
+                break
+            if ch == '\r':
+                if pos >= end:
+                    break
+                if self.data[pos] == '\n':
+                    pos += 1
+                    break
+                else:
+                    break
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def readline(self, marker, limit):
+        start = self.pos
+        limit = self._convert_limit(limit)
+        end = start + limit
+        found = False
+        for pos in range(start, end - len(marker) + 1):
+            ch = self.data[pos]
+            if ch == marker[0]:
+                for j in range(1, len(marker)):
+                    if self.data[pos + j] != marker[j]:
+                        break  # from inner loop
+                else:
+                    pos += len(marker)
+                    found = True
+                    break
+        if not found:
+            pos = end
+        self.pos = pos
+        result = u''.join(self.data[start:pos])
+        return result
+
+    def write(self, string):
+        length = len(string)
+        if self.pos + length > len(self.data):
+            self.resize(self.pos + length)
+
+        for i in range(length):
+            self.data[self.pos + i] = string[i]
+        self.pos += length
+
+    def seek(self, pos):
+        self.pos = pos
+
+    def truncate(self, size):
+        if size < len(self.data):
+            self.resize(size)
+
+    def getvalue(self):
+        return u''.join(self.data)
+
 
 class W_StringIO(W_TextIOBase):
     def __init__(self, space):
         W_TextIOBase.__init__(self, space)
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
 
-    @unwrap_spec(w_newline = WrappedDefault("\n"))
+    @unwrap_spec(w_newline=WrappedDefault("\n"))
     def descr_init(self, space, w_initvalue=None, w_newline=None):
         # In case __init__ is called multiple times
-        self.buf = []
-        self.pos = 0
+        self.buf = UnicodeIO()
         self.w_decoder = None
         self.readnl = None
         self.writenl = None
@@ -27,7 +121,7 @@
             newline = space.unicode_w(w_newline)
 
         if (newline is not None and newline != u"" and newline != u"\n" and
-            newline != u"\r" and newline != u"\r\n"):
+                newline != u"\r" and newline != u"\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -50,7 +144,7 @@
 
         if not space.is_none(w_initvalue):
             self.write_w(space, w_initvalue)
-            self.pos = 0
+            self.buf.pos = 0
 
     def descr_getstate(self, space):
         w_initialval = self.getvalue_w(space)
@@ -58,9 +152,9 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl)) # YYY
+            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
         return space.newtuple([
-            w_initialval, w_readnl, space.newint(self.pos), w_dict
+            w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
 
     def descr_setstate(self, space, w_state):
@@ -69,34 +163,33 @@
         # We allow the state tuple to be longer than 4, because we may need
         # someday to extend the object's state without breaking
         # backwards-compatibility
-        if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4:
+        if (not space.isinstance_w(w_state, space.w_tuple)
+                or space.len_w(w_state) < 4):
             raise oefmt(space.w_TypeError,
                         "%T.__setstate__ argument should be a 4-tuple, got %T",
                         self, w_state)
         w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4)
+        if not space.isinstance_w(w_initval, space.w_unicode):
+            raise oefmt(space.w_TypeError,
+                        "unicode argument expected, got '%T'", w_initval)
         # Initialize state
-        self.descr_init(space, w_initval, w_readnl)
+        self.descr_init(space, None, w_readnl)
 
-        # Restore the buffer state. Even if __init__ did initialize the buffer,
-        # we have to initialize it again since __init__ may translates the
-        # newlines in the inital_value string. We clearly do not want that
+        # Restore the buffer state. We're not doing it via __init__
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
         initval = space.unicode_w(w_initval)
-        size = len(initval)
-        self.resize_buffer(size)
-        self.buf = list(initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
                         "position value cannot be negative")
-        self.pos = pos
+        self.buf = UnicodeIO(list(initval), pos)
         if not space.is_w(w_dict, space.w_None):
             if not space.isinstance_w(w_dict, space.w_dict):
-                raise oefmt(space.w_TypeError,
-                            "fourth item of state should be a dict, got a %T",
-                            w_dict)
+                raise oefmt(
+                    space.w_TypeError,
+                    "fourth item of state should be a dict, got a %T", w_dict)
             # Alternatively, we could replace the internal dictionary
             # completely. However, it seems more practical to just update it.
             space.call_method(self.w_dict, "update", w_dict)
@@ -107,88 +200,47 @@
                 message = "I/O operation on closed file"
             raise OperationError(space.w_ValueError, space.newtext(message))
 
-    def resize_buffer(self, newlength):
-        if len(self.buf) > newlength:
-            self.buf = self.buf[:newlength]
-        if len(self.buf) < newlength:
-            self.buf.extend([u'\0'] * (newlength - len(self.buf)))
-
-    def write(self, string):
-        length = len(string)
-        if self.pos + length > len(self.buf):
-            self.resize_buffer(self.pos + length)
-
-        for i in range(length):
-            self.buf[self.pos + i] = string[i]
-        self.pos += length
-
     def write_w(self, space, w_obj):
         if not space.isinstance_w(w_obj, space.w_unicode):
             raise oefmt(space.w_TypeError,
                         "unicode argument expected, got '%T'", w_obj)
         self._check_closed(space)
-
         orig_size = space.len_w(w_obj)
 
         if self.w_decoder is not None:
             w_decoded = space.call_method(
-                self.w_decoder, "decode", w_obj, space.w_True
-            )
+                self.w_decoder, "decode", w_obj, space.w_True)
         else:
             w_decoded = w_obj
-
         if self.writenl:
             w_decoded = space.call_method(
-                w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl)
-            )
+                w_decoded, "replace",
+                space.newtext("\n"), space.newunicode(self.writenl))
+        string = space.unicode_w(w_decoded)
+        if string:
+            self.buf.write(string)
 
-        string = space.unicode_w(w_decoded)
-        size = len(string)
-
-        if size:
-            self.write(string)
         return space.newint(orig_size)
 
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        start = self.pos
-        available = len(self.buf) - start
-        if available <= 0:
-            return space.newunicode(u"")
-        if size >= 0 and size <= available:
-            end = start + size
-        else:
-            end = len(self.buf)
-        assert 0 <= start <= end
-        self.pos = end
-        return space.newunicode(u''.join(self.buf[start:end]))
+        return space.newunicode(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
         limit = convert_size(space, w_limit)
+        if self.readuniversal:
+            result = self.buf.readline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                newline = self.readnl
+            result = self.buf.readline(newline, limit)
+        return space.newunicode(result)
 
-        if self.pos >= len(self.buf):
-            return space.newunicode(u"")
-
-        start = self.pos
-        if limit < 0 or limit > len(self.buf) - self.pos:
-            limit = len(self.buf) - self.pos
-
-        assert limit >= 0
-        end = start + limit
-
-        endpos, consumed = self._find_line_ending(
-            # XXX: super inefficient, makes a copy of the entire contents.
-            u"".join(self.buf),
-            start,
-            end
-        )
-        if endpos < 0:
-            endpos = end
-        assert endpos >= 0
-        self.pos = endpos
-        return space.newunicode(u"".join(self.buf[start:endpos]))
 
     @unwrap_spec(pos=int, mode=int)
     def seek_w(self, space, pos, mode=0):
@@ -204,32 +256,27 @@
 
         # XXX: this makes almost no sense, but its how CPython does it.
         if mode == 1:
-            pos = self.pos
+            pos = self.buf.pos
         elif mode == 2:
-            pos = len(self.buf)
-
+            pos = len(self.buf.data)
         assert pos >= 0
-        self.pos = pos
+        self.buf.seek(pos)
         return space.newint(pos)
 
     def truncate_w(self, space, w_size=None):
         self._check_closed(space)
         if space.is_none(w_size):
-            size = self.pos
+            size = self.buf.pos
         else:
             size = space.int_w(w_size)
-
         if size < 0:
             raise oefmt(space.w_ValueError, "Negative size value %d", size)
-
-        if size < len(self.buf):
-            self.resize_buffer(size)
-
+        self.buf.truncate(size)
         return space.newint(size)
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(u''.join(self.buf))
+        return space.newunicode(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -221,46 +221,6 @@
     def newlines_get_w(self, space):
         return space.w_None
 
-    def _find_line_ending(self, line, start, end):
-        size = end - start
-        if self.readtranslate:
-            # Newlines are already translated, only search for \n
-            pos = line.find('\n', start, end)
-            if pos >= 0:
-                return pos + 1, 0
-            else:
-                return -1, size
-        elif self.readuniversal:
-            # Universal newline search. Find any of \r, \r\n, \n
-            # The decoder ensures that \r\n are not split in two pieces
-            i = start
-            while True:
-                # Fast path for non-control chars.
-                while i < end and line[i] > '\r':
-                    i += 1
-                if i >= end:
-                    return -1, size
-                ch = line[i]
-                i += 1
-                if ch == '\n':
-                    return i, 0
-                if ch == '\r':
-                    if line[i] == '\n':
-                        return i + 1, 0
-                    else:
-                        return i, 0
-        else:
-            # Non-universal mode.
-            pos = line.find(self.readnl, start, end)
-            if pos >= 0:
-                return pos + len(self.readnl), 0
-            else:
-                pos = line.find(self.readnl[0], start, end)
-                if pos >= 0:
-                    return -1, pos - start
-                return -1, size
-
-
 W_TextIOBase.typedef = TypeDef(
     '_io._TextIOBase', W_IOBase.typedef,
     __new__ = generic_new_descr(W_TextIOBase),
@@ -336,6 +296,126 @@
         self.input = input
 
 
+class DecodeBuffer(object):
+    def __init__(self, text=None):
+        self.text = text
+        self.pos = 0
+
+    def set(self, space, w_decoded):
+        check_decoded(space, w_decoded)
+        self.text = space.unicode_w(w_decoded)
+        self.pos = 0
+
+    def reset(self):
+        self.text = None
+        self.pos = 0
+
+    def get_chars(self, size):
+        if self.text is None:
+            return u""
+
+        available = len(self.text) - self.pos
+        if size < 0 or size > available:
+            size = available
+        assert size >= 0
+
+        if self.pos > 0 or size < available:
+            start = self.pos
+            end = self.pos + size
+            assert start >= 0
+            assert end >= 0
+            chars = self.text[start:end]
+        else:
+            chars = self.text
+
+        self.pos += size
+        return chars
+
+    def has_data(self):
+        return (self.text is not None and not self.exhausted())
+
+    def exhausted(self):
+        return self.pos >= len(self.text)
+
+    def next_char(self):
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        self.pos += 1
+        return ch
+
+    def peek_char(self):
+        # like next_char, but doesn't advance pos
+        if self.exhausted():
+            raise StopIteration
+        ch = self.text[self.pos]
+        return ch
+
+    def find_newline_universal(self, limit):
+        # Universal newline search. Find any of \r, \r\n, \n
+        # The decoder ensures that \r\n are not split in two pieces
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == u'\n':
+                return True
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    ch = self.peek_char()
+                except StopIteration:
+                    return False
+                if ch == u'\n':
+                    self.next_char()
+                    return True
+                else:
+                    return True
+        return False
+
+    def find_crlf(self, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            scanned += 1
+            if ch == u'\r':
+                if scanned >= limit:
+                    return False
+                try:
+                    if self.peek_char() == u'\n':
+                        self.next_char()
+                        return True
+                except StopIteration:
+                    # This is the tricky case: we found a \r right at the end
+                    self.pos -= 1
+                    return False
+        return False
+
+    def find_char(self, marker, limit):
+        if limit < 0:
+            limit = sys.maxint
+        scanned = 0
+        while scanned < limit:
+            try:
+                ch = self.next_char()
+            except StopIteration:
+                return False
+            if ch == marker:
+                return True
+            scanned += 1
+        return False
+
+
 def check_decoded(space, w_decoded):
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
@@ -349,8 +429,7 @@
         self.w_encoder = None
         self.w_decoder = None
 
-        self.decoded_chars = None   # buffer for text returned from decoder
-        self.decoded_chars_used = 0 # offset into _decoded_chars for read()
+        self.decoded = DecodeBuffer()
         self.pending_bytes = None   # list of bytes objects waiting to be
                                     # written, or NULL
         self.chunk_size = 8192
@@ -518,40 +597,10 @@
     # _____________________________________________________________
     # read methods
 
-    def _unset_decoded(self):
-        self.decoded_chars = None
-        self.decoded_chars_used = 0
-
-    def _set_decoded(self, space, w_decoded):
-        check_decoded(space, w_decoded)
-        self.decoded_chars = space.utf8_w(w_decoded)
-        self.decoded_chars_used = 0
-
-    def _get_decoded_chars(self, size):
-        if self.decoded_chars is None:
-            return ""
-
-        available = len(self.decoded_chars) - self.decoded_chars_used
-        if size < 0 or size > available:
-            size = available
-        assert size >= 0
-
-        if self.decoded_chars_used > 0 or size < available:
-            start = self.decoded_chars_used
-            end = self.decoded_chars_used + size
-            assert start >= 0
-            assert end >= 0
-            chars = self.decoded_chars[start:end]
-        else:
-            chars = self.decoded_chars
-
-        self.decoded_chars_used += size
-        return chars
-
     def _read_chunk(self, space):
         """Read and decode the next chunk of data from the BufferedReader.
         The return value is True unless EOF was reached.  The decoded string
-        is placed in self._decoded_chars (replacing its previous value).
+        is placed in self.decoded (replacing its previous value).
         The entire input chunk is sent to the decoder, though some of it may
         remain buffered in the decoder, yet to be converted."""
 
@@ -571,7 +620,7 @@
             dec_buffer = None
             dec_flags = 0
 
-        # Read a chunk, decode it, and put the result in self._decoded_chars
+        # Read a chunk, decode it, and put the result in self.decoded
         w_input = space.call_method(self.w_buffer, "read1",
                                     space.newint(self.chunk_size))
 
@@ -583,7 +632,7 @@
         eof = space.len_w(w_input) == 0
         w_decoded = space.call_method(self.w_decoder, "decode",
                                       w_input, space.newbool(eof))
-        self._set_decoded(space, w_decoded)
+        self.decoded.set(space, w_decoded)
         if space.len_w(w_decoded) > 0:
             eof = False
 
@@ -595,6 +644,19 @@
 
         return not eof
 
+    def _ensure_data(self, space):
+        while not self.decoded.has_data():
+            try:
+                if not self._read_chunk(space):
+                    self.decoded.reset()
+                    self.snapshot = None
+                    return False
+            except OperationError as e:
+                if trap_eintr(space, e):
+                    continue
+                raise
+        return True
+
     def next_w(self, space):
         self._check_attached(space)
         self.telling = False
@@ -619,7 +681,7 @@
             w_bytes = space.call_method(self.w_buffer, "read")
             w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
             check_decoded(space, w_decoded)
-            w_result = space.new_from_utf8(self._get_decoded_chars(-1))
+            w_result = space.new_from_utf8(self.decoded.get_chars(-1))
             w_final = space.add(w_result, w_decoded)
             self.snapshot = None
             return w_final
@@ -628,24 +690,29 @@
         builder = StringBuilder(size)
 
         # Keep reading chunks until we have n characters to return
-        while True:
-            data = self._get_decoded_chars(remaining)
+        while remaining > 0:
+            if not self._ensure_data(space):
+                break
+            data = self.decoded.get_chars(remaining)
             builder.append(data)
             remaining -= len(data)
 
-            if remaining <= 0: # Done
-                break
+        return space.new_from_utf8(builder.build())
 
-            try:
-                if not self._read_chunk(space):
-                    # EOF
-                    break
-            except OperationError as e:
-                if trap_eintr(space, e):
-                    continue
-                raise
-
-        return space.new_from_utf8(builder.build())
+    def _scan_line_ending(self, limit):
+        if self.readuniversal:
+            return self.decoded.find_newline_universal(limit)
+        else:
+            if self.readtranslate:
+                # Newlines are already translated, only search for \n
+                newline = u'\n'
+            else:
+                # Non-universal mode.
+                newline = self.readnl
+            if newline == u'\r\n':
+                return self.decoded.find_crlf(limit)
+            else:
+                return self.decoded.find_char(newline[0], limit)
 
     def readline_w(self, space, w_limit=None):
         self._check_attached(space)
@@ -653,82 +720,52 @@
         self._writeflush(space)
 
         limit = convert_size(space, w_limit)
-
-        line = None
-        remaining = None
+        remnant = None
         builder = StringBuilder()
-
         while True:
             # First, get some data if necessary
-            has_data = True
-            while not self.decoded_chars:
-                try:
-                    if not self._read_chunk(space):
-                        has_data = False
-                        break
-                except OperationError as e:
-                    if trap_eintr(space, e):
-                        continue
-                    raise
+            has_data = self._ensure_data(space)
             if not has_data:
                 # end of file
-                self._unset_decoded()
-                self.snapshot = None
-                start = endpos = offset_to_buffer = 0
+                if remnant:
+                    builder.append(remnant)
                 break
 
-            if not remaining:
-                line = self.decoded_chars
-                start = self.decoded_chars_used
-                offset_to_buffer = 0
+            if remnant:
+                assert not self.readtranslate and self.readnl == '\r\n'
+                assert self.decoded.pos == 0
+                if remnant == '\r' and self.decoded.text[0] == '\n':
+                    builder.append('\r\n')
+                    self.decoded.pos = 1
+                    remnant = None
+                    break
+                else:
+                    builder.append(remnant)
+                    remnant = None
+                    continue
+
+            if limit > 0:
+                remaining = limit - builder.getlength()
+                assert remaining >= 0
             else:
-                assert self.decoded_chars_used == 0
-                line = remaining + self.decoded_chars
-                start = 0
-                offset_to_buffer = len(remaining)
-                remaining = None
+                remaining = -1
+            start = self.decoded.pos
+            assert start >= 0
+            found = self._scan_line_ending(remaining)
+            end_scan = self.decoded.pos
+            if end_scan > start:
+                s = self.decoded.text[start:end_scan]
+                builder.append(s)
 
-            line_len = len(line)
-            endpos, consumed = self._find_line_ending(line, start, line_len)
-            chunked = builder.getlength()
-            if endpos >= 0:
-                if limit >= 0 and endpos >= start + limit - chunked:
-                    endpos = start + limit - chunked
-                    assert endpos >= 0
-                break
-            assert consumed >= 0
-
-            # We can put aside up to `endpos`
-            endpos = consumed + start
-            if limit >= 0 and endpos >= start + limit - chunked:
-                # Didn't find line ending, but reached length limit
-                endpos = start + limit - chunked
-                assert endpos >= 0
+            if found or (limit >= 0 and builder.getlength() >= limit):
                 break
 
-            # No line ending seen yet - put aside current data
-            if endpos > start:
-                s = line[start:endpos]
-                builder.append(s)
-
-            # There may be some remaining bytes we'll have to prepend to the
+            # There may be some remaining chars we'll have to prepend to the
             # next chunk of data
-            if endpos < line_len:
-                remaining = line[endpos:]
-            line = None
+            if not self.decoded.exhausted():
+                remnant = self.decoded.get_chars(-1)
             # We have consumed the buffer
-            self._unset_decoded()
-
-        if line:
-            # Our line ends in the current buffer
-            decoded_chars_used = endpos - offset_to_buffer
-            assert decoded_chars_used >= 0
-            self.decoded_chars_used = decoded_chars_used
-            if start > 0 or endpos < len(line):
-                line = line[start:endpos]
-            builder.append(line)
-        elif remaining:
-            builder.append(remaining)
+            self.decoded.reset()
 
         result = builder.build()
         return space.new_from_utf8(result)
@@ -862,7 +899,7 @@
                 raise oefmt(space.w_IOError,
                             "can't do nonzero end-relative seeks")
             space.call_method(self, "flush")
-            self._unset_decoded()
+            self.decoded.reset()
             self.snapshot = None
             if self.w_decoder:
                 space.call_method(self.w_decoder, "reset")
@@ -887,7 +924,7 @@
         # Seek back to the safe start point
         space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos))
 
-        self._unset_decoded()
+        self.decoded.reset()
         self.snapshot = None
 
         # Restore the decoder to its state from the safe start point.
@@ -908,13 +945,13 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self._set_decoded(space, w_decoded)
+            self.decoded.set(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded_chars) < cookie.chars_to_skip:
+            if len(self.decoded.text) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded_chars_used = cookie.chars_to_skip
+            self.decoded.pos = cookie.chars_to_skip
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -940,7 +977,7 @@
         w_pos = space.call_method(self.w_buffer, "tell")
 
         if self.w_decoder is None or self.snapshot is None:
-            assert not self.decoded_chars
+            assert not self.decoded.text
             return w_pos
 
         cookie = PositionCookie(space.bigint_w(w_pos))
@@ -951,11 +988,11 @@
         cookie.start_pos -= len(input)
 
         # How many decoded characters have been used up since the snapshot?
-        if not self.decoded_chars_used:
+        if not self.decoded.pos:
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded_chars_used
+        chars_to_skip = self.decoded.pos
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -0,0 +1,68 @@
+import pytest
+try:
+    from hypothesis import given, strategies as st, assume
+except ImportError:
+    pytest.skip("hypothesis required")
+from pypy.module._io.interp_bytesio import W_BytesIO
+from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
+
+LINESEP = ['', '\r', '\n', '\r\n']
+
+ at st.composite
+def text_with_newlines(draw):
+    sep = draw(st.sampled_from(LINESEP))
+    lines = draw(st.lists(st.text(max_size=10), max_size=10))
+    return sep.join(lines)
+
+ at given(txt=text_with_newlines(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
+       limit=st.integers(min_value=-1))
+def test_readline(space, txt, mode, limit):
+    assume(limit != 0)
+    w_stream = W_BytesIO(space)
+    w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
+    w_textio = W_TextIOWrapper(space)
+    w_textio.descr_init(
+        space, w_stream, encoding='utf-8',
+        w_newline=space.newtext(mode))
+    lines = []
+    while True:
+        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        if limit > 0:
+            assert len(line) <= limit
+        if line:
+            lines.append(line)
+        else:
+            break
+    assert u''.join(lines) == txt
+
+ at given(st.text())
+def test_read_buffer(text):
+    buf = DecodeBuffer(text)
+    assert buf.get_chars(-1) == text
+    assert buf.exhausted()
+
+ at given(st.text(), st.lists(st.integers(min_value=0)))
+def test_readn_buffer(text, sizes):
+    buf = DecodeBuffer(text)
+    strings = []
+    for n in sizes:
+        s = buf.get_chars(n)
+        if not buf.exhausted():
+            assert len(s) == n
+        else:
+            assert len(s) <= n
+        strings.append(s)
+    assert ''.join(strings) == text[:sum(sizes)]
+
+ at given(st.text())
+def test_next_char(text):
+    buf = DecodeBuffer(text)
+    chars = []
+    try:
+        while True:
+            chars.append(buf.next_char())
+    except StopIteration:
+        pass
+    assert buf.exhausted()
+    assert u''.join(chars) == text
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -197,19 +197,21 @@
 MBENC_FLUSH = 1
 MBENC_RESET = 2
 
-def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
+def encode(codec, unicodedata, length, errors="strict", errorcb=None,
+           namecb=None):
     encodebuf = pypy_cjk_enc_new(codec)
     if not encodebuf:
         raise MemoryError
     try:
-        return encodeex(encodebuf, unicodedata, errors, errorcb, namecb)
+        return encodeex(encodebuf, unicodedata, length, errors, errorcb, namecb)
     finally:
         pypy_cjk_enc_free(encodebuf)
 
-def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
+def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None,
              namecb=None, ignore_error=0):
-    inleft = len(unicodedata)
-    with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf:
+    inleft = length
+    inbuf = rffi.utf82wcharp(utf8data, length)
+    try:
         if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
         if ignore_error == 0:
@@ -221,16 +223,18 @@
             if r == 0 or r == ignore_error:
                 break
             multibytecodec_encerror(encodebuf, r, errors,
-                                    errorcb, namecb, unicodedata)
+                                    errorcb, namecb, utf8data)
         while flags & MBENC_RESET:
             r = pypy_cjk_enc_reset(encodebuf)
             if r == 0:
                 break
             multibytecodec_encerror(encodebuf, r, errors,
-                                    errorcb, namecb, unicodedata)
+                                    errorcb, namecb, utf8data)
         src = pypy_cjk_enc_outbuf(encodebuf)
         length = pypy_cjk_enc_outlen(encodebuf)
         return rffi.charpsize2str(src, length)
+    finally:
+        lltype.free(inbuf, flavor='raw')
 
 def multibytecodec_encerror(encodebuf, e, errors,
                             errorcb, namecb, unicodedata):
@@ -256,21 +260,16 @@
     elif errors == "replace":
         codec = pypy_cjk_enc_getcodec(encodebuf)
         try:
-            replace = encode(codec, u"?")
+            replace = encode(codec, "?", 1)
         except EncodeDecodeError:
             replace = "?"
     else:
         assert errorcb
-        XXX
-        retu, rets, end = errorcb(errors, namecb, reason,
-                                  unicodedata.encode("utf8"), start, end)
-        if rets is not None:
-            # py3k only
-            replace = rets
-        else:
-            assert retu is not None
-            codec = pypy_cjk_enc_getcodec(encodebuf)
-            replace = encode(codec, retu, "strict", errorcb, namecb)
+        rets, end = errorcb(errors, namecb, reason,
+                            unicodedata, start, end)
+        codec = pypy_cjk_enc_getcodec(encodebuf)
+        lgt, _ = rutf8.get_utf8_length_flag(rets)
+        replace = encode(codec, rets, lgt, "strict", errorcb, namecb)
     with rffi.scoped_nonmovingbuffer(replace) as inbuf:
         r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end)
     if r == MBERR_NOMEMORY:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
--- a/pypy/module/_multibytecodec/interp_incremental.py
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -1,4 +1,5 @@
 from rpython.rtyper.lltypesystem import lltype
+from rpython.rlib import rutf8
 from pypy.module._multibytecodec import c_codecs
 from pypy.module._multibytecodec.interp_multibytecodec import (
     MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror,
@@ -65,7 +66,8 @@
         pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
         assert 0 <= pos <= len(object)
         self.pending = object[pos:]
-        return space.newunicode(output)
+        lgt, flag = rutf8.get_utf8_length_flag(output)
+        return space.newutf8(output, lgt, flag)
 
 
 @unwrap_spec(errors="text_or_none")
@@ -88,7 +90,8 @@
 
     def _initialize(self):
         self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec)
-        self.pending = u""
+        self.pending = ""
+        self.pending_len = 0
 
     def _free(self):
         self.pending = None
@@ -96,25 +99,37 @@
             c_codecs.pypy_cjk_enc_free(self.encodebuf)
             self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
 
-    @unwrap_spec(object='utf8', final=bool)
-    def encode_w(self, object, final=False):
-        u_object = object.decode('utf8')
+    @unwrap_spec(final=bool)
+    def encode_w(self, space, w_object, final=False):
+        utf8data, length = space.utf8_len_w(w_object)
         space = self.space
         state = space.fromcache(CodecState)
         if len(self.pending) > 0:
-            u_object = self.pending + u_object
+            utf8data = self.pending + utf8data
+            length += self.pending_len
         try:
-            output = c_codecs.encodeex(self.encodebuf, u_object, self.errors,
+            output = c_codecs.encodeex(self.encodebuf, utf8data, length,
+                                       self.errors,
                                        state.encode_error_handler, self.name,
                                        get_ignore_error(final))
         except c_codecs.EncodeDecodeError as e:
-            raise wrap_unicodeencodeerror(space, e, object, len(u_object),
+            raise wrap_unicodeencodeerror(space, e, utf8data, length,
                                           self.name)
         except RuntimeError:
             raise wrap_runtimeerror(space)
         pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
-        assert 0 <= pos <= len(u_object)
-        self.pending = u_object[pos:]
+        assert 0 <= pos <= length
+        # scan the utf8 string until we hit pos
+        i = 0
+        stop = length - pos
+        self.pending_len = stop
+        if stop > 0:
+            while pos > 0:
+                i = rutf8.next_codepoint_pos(utf8data, i)
+                pos -= 1
+            self.pending = utf8data[i:]
+        else:
+            self.pending = ""
         return space.newbytes(output)
 
 
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -31,23 +31,23 @@
         return space.newtuple([space.newutf8(utf8_output, lgt, flag),
                                space.newint(len(input))])
 
-    @unwrap_spec(input='utf8', errors="text_or_none")
-    def encode(self, space, input, errors=None):
+    @unwrap_spec(errors="text_or_none")
+    def encode(self, space, w_input, errors=None):
         if errors is None:
             errors = 'strict'
         state = space.fromcache(CodecState)
+        input, length = space.utf8_len_w(w_input)
         #
-        u_input = input.decode('utf8')
         try:
-            output = c_codecs.encode(self.codec, u_input, errors,
+            output = c_codecs.encode(self.codec, input, length, errors,
                                      state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError as e:
-            raise wrap_unicodeencodeerror(space, e, input, len(u_input),
+            raise wrap_unicodeencodeerror(space, e, input, length,
                                           self.name)
         except RuntimeError:
             raise wrap_runtimeerror(space)
         return space.newtuple([space.newbytes(output),
-                               space.newint(len(u_input))])
+                               space.newint(length)])
 
 
 MultibyteCodec.typedef = TypeDef(
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -14,27 +14,27 @@
 def test_decode_gbk():
     c = getcodec("gbk")
     u = decode(c, "\xA1\xAA")
-    assert u == unichr(0x2014)
+    assert u == unichr(0x2014).encode('utf8')
     u = decode(c, "foobar")
-    assert u == u"foobar"
+    assert u == "foobar"
 
 def test_decode_hz():
     # stateful
     c = getcodec("hz")
     u = decode(c, "~{abc}")
-    assert u == u'\u5f95\u6cef'
+    assert u == u'\u5f95\u6cef'.encode('utf8')
     u = decode(c, "~{")
-    assert u == u''
+    assert u == ''
 
 def test_decodeex_hz():
     c = getcodec("hz")
     decodebuf = c_codecs.pypy_cjk_dec_new(c)
     u = c_codecs.decodeex(decodebuf, "~{abcd~}")
-    assert u == u'\u5f95\u6c85'
+    assert u == u'\u5f95\u6c85'.encode('utf8')
     u = c_codecs.decodeex(decodebuf, "~{efgh~}")
-    assert u == u'\u5f50\u73b7'
+    assert u == u'\u5f50\u73b7'.encode('utf8')
     u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
-    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'.encode('utf8')
     c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decodeex_hz_incomplete():
@@ -64,7 +64,7 @@
         buf += c
         u = c_codecs.decodeex(decodebuf, buf,
                               ignore_error = c_codecs.MBERR_TOOFEW)
-        assert u == output
+        assert u == output.encode('utf8')
         incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
         buf = buf[incompletepos:]
     assert buf == ''
@@ -86,46 +86,47 @@
 def test_decode_hz_ignore():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'ignore')
-    assert u == u'def\u5fcf'
+    assert u == u'def\u5fcf'.encode('utf8')
 
 def test_decode_hz_replace():
     c = getcodec("hz")
     u = decode(c, 'def~{}abc', 'replace')
-    assert u == u'def\ufffd\u5fcf'
+    assert u == u'def\ufffd\u5fcf'.encode('utf8')
 
 def test_encode_hz():
     c = getcodec("hz")
-    s = encode(c, u'foobar')
+    s = encode(c, u'foobar'.encode('utf8'), 6)
     assert s == 'foobar' and type(s) is str
-    s = encode(c, u'\u5f95\u6cef')
+    s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2)
     assert s == '~{abc}~}'
 
 def test_encode_hz_error():
     # error
     c = getcodec("hz")
-    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value
+    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def'.encode('utf8'), 7).value
     assert e.start == 3
     assert e.end == 4
     assert e.reason == "illegal multibyte sequence"
 
 def test_encode_hz_ignore():
     c = getcodec("hz")
-    s = encode(c, u'abc\u1234def', 'ignore')
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'ignore')
     assert s == 'abcdef'
 
 def test_encode_hz_replace():
     c = getcodec("hz")
-    s = encode(c, u'abc\u1234def', 'replace')
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'replace')
     assert s == 'abc?def'
 
 def test_encode_jisx0208():
     c = getcodec('iso2022_jp')
-    s = encode(c, u'\u83ca\u5730\u6642\u592b')
+    s = encode(c, u'\u83ca\u5730\u6642\u592b'.encode('utf8'), 4)
     assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str
 
 def test_encode_custom_error_handler_bytes():
+    py.test.skip("needs revamping in py3k")
     c = getcodec("hz")
     def errorhandler(errors, enc, msg, t, startingpos, endingpos):
-        return None, '\xc3', endingpos
-    s = encode(c, u'abc\u1234def', 'foo', errorhandler)
+        return u'\xc3'.encode('utf8'), endingpos
+    s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler)
     assert '\xc3' in s
diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py
--- a/pypy/module/_multibytecodec/test/test_translation.py
+++ b/pypy/module/_multibytecodec/test/test_translation.py
@@ -1,6 +1,7 @@
 from pypy.module._multibytecodec import c_codecs
 from rpython.translator.c.test import test_standalone
 from rpython.config.translationoption import get_combined_translation_config
+from rpython.rlib import rutf8
 
 
 class TestTranslation(test_standalone.StandaloneTests):
@@ -13,7 +14,8 @@
             codecname, string = argv[1], argv[2]
             c = c_codecs.getcodec(codecname)
             u = c_codecs.decode(c, string)
-            r = c_codecs.encode(c, u)
+            lgt, _ = rutf8.get_utf8_length_flag(u)
+            r = c_codecs.encode(c, u, lgt)
             print r
             return 0
         #
diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py
--- a/pypy/module/_pypyjson/interp_decoder.py
+++ b/pypy/module/_pypyjson/interp_decoder.py
@@ -1,7 +1,7 @@
 import sys
 from rpython.rlib.rstring import StringBuilder
 from rpython.rlib.objectmodel import specialize, always_inline, r_dict
-from rpython.rlib import rfloat, runicode
+from rpython.rlib import rfloat, runicode, rutf8
 from rpython.rtyper.lltypesystem import lltype, rffi
 from pypy.interpreter.error import oefmt
 from pypy.interpreter import unicodehelper
@@ -19,29 +19,6 @@
         return 0.0
     return x * NEG_POW_10[exp]
 
-def strslice2unicode_latin1(s, start, end):
-    """
-    Convert s[start:end] to unicode. s is supposed to be an RPython string
-    encoded in latin-1, which means that the numeric value of each char is the
-    same as the corresponding unicode code point.
-
-    Internally it's implemented at the level of low-level helpers, to avoid
-    the extra copy we would need if we take the actual slice first.
-
-    No bound checking is done, use carefully.
-    """
-    from rpython.rtyper.annlowlevel import llstr, hlunicode
-    from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE
-    from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar
-    length = end-start
-    ll_s = llstr(s)
-    ll_res = malloc(UNICODE, length)
-    ll_res.hash = 0
-    for i in range(length):
-        ch = ll_s.chars[start+i]
-        ll_res.chars[i] = cast_primitive(UniChar, ch)
-    return hlunicode(ll_res)
-
 def slice_eq(a, b):
     (ll_chars1, start1, length1, _) = a
     (ll_chars2, start2, length2, _) = b
@@ -270,10 +247,11 @@
             self.pos = i+1
             return self.space.newdict()
 
-        d = {}
+        # XXX this should be improved to use an unwrapped dict
+        w_dict = self.space.newdict()
         while True:
             # parse a key: value
-            name = self.decode_key(i)
+            w_name = self.decode_key(i)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             if ch != ':':
@@ -282,13 +260,13 @@
             i = self.skip_whitespace(i)
             #
             w_value = self.decode_any(i)
-            d[name] = w_value
+            self.space.setitem(w_dict, w_name, w_value)
             i = self.skip_whitespace(self.pos)
             ch = self.ll_chars[i]
             i += 1
             if ch == '}':
                 self.pos = i
-                return self._create_dict(d)
+                return w_dict
             elif ch == ',':
                 pass
             elif ch == '\0':
@@ -297,10 +275,6 @@
                 self._raise("Unexpected '%s' when decoding object (char %d)",
                             ch, i-1)
 
-    def _create_dict(self, d):
-        from pypy.objspace.std.dictmultiobject import from_unicode_key_dict
-        return from_unicode_key_dict(self.space, d)
-
     def decode_string(self, i):
         start = i
         bits = 0
@@ -312,8 +286,7 @@
             bits |= ord(ch)
             if ch == '"':
                 self.pos = i
-                return self.space.newunicode(
-                        self._create_string(start, i - 1, bits))
+                return self._create_string(start, i - 1, bits)
             elif ch == '\\' or ch < '\x20':
                 self.pos = i-1
                 return self.decode_string_escaped(start)
@@ -322,12 +295,15 @@
         if bits & 0x80:
             # the 8th bit is set, it's an utf8 string
             content_utf8 = self.getslice(start, end)
-            return unicodehelper.decode_utf8(self.space, content_utf8)
+            lgt, flag = unicodehelper.check_utf8_or_raise(self.space,
+                                                          content_utf8)
+            return self.space.newutf8(content_utf8, lgt, flag)
         else:
             # ascii only, fast path (ascii is a strict subset of
             # latin1, and we already checked that all the chars are <
             # 128)
-            return strslice2unicode_latin1(self.s, start, end)
+            return self.space.newutf8(self.getslice(start, end),
+                                      end - start, rutf8.FLAG_ASCII)
 
     def decode_string_escaped(self, start):
         i = self.pos
@@ -340,9 +316,10 @@
             i += 1
             if ch == '"':
                 content_utf8 = builder.build()
-                content_unicode = unicodehelper.decode_utf8(self.space, content_utf8)
+                lgt, f = unicodehelper.check_utf8_or_raise(self.space,
+                                                           content_utf8)
                 self.pos = i
-                return self.space.newunicode(content_unicode)
+                return self.space.newutf8(content_utf8, lgt, f)
             elif ch == '\\':
                 i = self.decode_escape_sequence(i, builder)
             elif ch < '\x20':
@@ -389,8 +366,7 @@
             return # help the annotator to know that we'll never go beyond
                    # this point
         #
-        uchr = runicode.code_to_unichr(val)     # may be a surrogate pair again
-        utf8_ch = unicodehelper.encode_utf8(self.space, uchr)
+        utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True)
         builder.append(utf8_ch)
         return i
 
@@ -404,7 +380,7 @@
         return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00))
 
     def decode_key(self, i):
-        """ returns an unwrapped unicode """
+        """ returns a wrapped unicode """
         from rpython.rlib.rarithmetic import intmask
 
         i = self.skip_whitespace(i)
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -1,5 +1,5 @@
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.runicode import str_decode_utf_8
+from rpython.rlib import rutf8
 from pypy.interpreter import unicodehelper
 
 
@@ -30,11 +30,8 @@
             # the input is a string with only non-special ascii chars
             return w_string
 
-        eh = unicodehelper.decode_error_handler(space)
-        u = str_decode_utf_8(
-                s, len(s), None, final=True, errorhandler=eh,
-                allow_surrogates=True)[0]
-        sb = StringBuilder(len(u))
+        unicodehelper.check_utf8_or_raise(space, s)
+        sb = StringBuilder(len(s))
         sb.append_slice(s, 0, first)
     else:
         # We used to check if 'u' contains only safe characters, and return
@@ -44,29 +41,31 @@
         # a string (with the ascii encoding).  This requires two passes
         # over the characters.  So we may as well directly turn it into a
         # string here --- only one pass.
-        u = space.unicode_w(w_string)
-        sb = StringBuilder(len(u))
+        s = space.utf8_w(w_string)
+        sb = StringBuilder(len(s))
         first = 0
 
-    for i in range(first, len(u)):
-        c = u[i]
-        if c <= u'~':
-            if c == u'"' or c == u'\\':
+    it = rutf8.Utf8StringIterator(s)
+    for i in range(first):
+        it.next()
+    for c in it:
+        if c <= ord('~'):
+            if c == ord('"') or c == ord('\\'):
                 sb.append('\\')
-            elif c < u' ':
-                sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+            elif c < ord(' '):
+                sb.append(ESCAPE_BEFORE_SPACE[c])
                 continue
-            sb.append(chr(ord(c)))
+            sb.append(chr(c))
         else:
-            if c <= u'\uffff':
+            if c <= ord(u'\uffff'):
                 sb.append('\\u')
-                sb.append(HEX[ord(c) >> 12])
-                sb.append(HEX[(ord(c) >> 8) & 0x0f])
-                sb.append(HEX[(ord(c) >> 4) & 0x0f])
-                sb.append(HEX[ord(c) & 0x0f])
+                sb.append(HEX[c >> 12])
+                sb.append(HEX[(c >> 8) & 0x0f])
+                sb.append(HEX[(c >> 4) & 0x0f])
+                sb.append(HEX[c & 0x0f])
             else:
                 # surrogate pair
-                n = ord(c) - 0x10000
+                n = c - 0x10000
                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
                 sb.append('\\ud')
                 sb.append(HEX[(s1 >> 8) & 0x0f])
diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py
--- a/pypy/module/_pypyjson/test/test__pypyjson.py
+++ b/pypy/module/_pypyjson/test/test__pypyjson.py
@@ -10,10 +10,14 @@
     assert dec.skip_whitespace(8) == len(s)
     dec.close()
 
+class FakeSpace(object):
+    def newutf8(self, s, l, f):
+        return s
+
 def test_decode_key():
     s1 = "123" * 100
     s = ' "%s"   "%s" ' % (s1, s1)
-    dec = JSONDecoder('fake space', s)
+    dec = JSONDecoder(FakeSpace(), s)
     assert dec.pos == 0
     x = dec.decode_key(0)
     assert x == s1
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -167,8 +167,8 @@
         addr = rffi.cast(rffi.ULONG, buf)
         self.argchain.arg(addr)
 
-    def handle_unichar_p(self, w_ffitype, w_obj, unicodeval):
-        buf = rffi.unicode2wcharp(unicodeval)
+    def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len):
+        buf = rffi.utf82wcharp(utf8val, utf8len)
         self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf))
         addr = rffi.cast(rffi.ULONG, buf)
         self.argchain.arg(addr)
diff --git a/pypy/module/_rawffi/alt/test/test_type_converter.py b/pypy/module/_rawffi/alt/test/test_type_converter.py
--- a/pypy/module/_rawffi/alt/test/test_type_converter.py
+++ b/pypy/module/_rawffi/alt/test/test_type_converter.py
@@ -6,7 +6,7 @@
 
 class DummyFromAppLevelConverter(FromAppLevelConverter):
 
-    def handle_all(self, w_ffitype, w_obj, val):
+    def handle_all(self, w_ffitype, w_obj, val, lgt=None):
         self.lastval = val
 
     handle_signed = handle_all
@@ -120,8 +120,8 @@
     def test_strings(self):
         # first, try automatic conversion from applevel
         self.check(app_types.char_p, self.space.newbytes('foo'), 'foo')
-        self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234')
-        self.check(app_types.unichar_p, self.space.wrap('foo'), u'foo')
+        self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234'.encode('utf8'))
+        self.check(app_types.unichar_p, self.space.wrap('foo'), 'foo')
         # then, try to pass explicit pointers
         self.check(app_types.char_p, self.space.wrap(42), 42)
         self.check(app_types.unichar_p, self.space.wrap(42), 42)
diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -1,6 +1,6 @@
 from rpython.rlib import libffi
-from rpython.rlib import jit
-from rpython.rlib.rarithmetic import r_uint
+from rpython.rlib import jit, rutf8
+from rpython.rlib.rarithmetic import r_uint, intmask
 from pypy.interpreter.error import oefmt
 from pypy.module._rawffi.structure import W_StructureInstance, W_Structure
 from pypy.module._rawffi.alt.interp_ffitype import app_types
@@ -85,8 +85,8 @@
             return True
         elif w_ffitype.is_unichar_p() and (w_type is self.space.w_bytes or
                                            w_type is self.space.w_unicode):
-            unicodeval = self.space.unicode_w(w_obj)
-            self.handle_unichar_p(w_ffitype, w_obj, unicodeval)
+            utf8, lgt = self.space.utf8_len_w(w_obj)
+            self.handle_unichar_p(w_ffitype, w_obj, utf8, lgt)
             return True
         return False
 
@@ -147,7 +147,7 @@
         """
         self.error(w_ffitype, w_obj)
 
-    def handle_unichar_p(self, w_ffitype, w_obj, unicodeval):
+    def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len):
         """
         unicodeval: interp-level unicode
         """
@@ -228,7 +228,8 @@
             return space.newbytes(chr(ucharval))
         elif w_ffitype.is_unichar():
             wcharval = self.get_unichar(w_ffitype)
-            return space.newunicode(unichr(wcharval))
+            return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1,
+                                 rutf8.get_flag_from_code(intmask(wcharval)))
         elif w_ffitype.is_double():
             return self._float(w_ffitype)
         elif w_ffitype.is_singlefloat():
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -10,6 +10,7 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.tool import rffi_platform
 from rpython.rlib.unroll import unrolling_iterable
+from rpython.rlib import rutf8
 from rpython.rlib.objectmodel import specialize
 import rpython.rlib.rposix as rposix
 
@@ -416,13 +417,13 @@
         val = s[0]
         push_func(add_arg, argdesc, val)
     elif letter == 'u':
-        s = space.unicode_w(w_arg)
-        if len(s) != 1:
+        s, lgt = space.utf8_len_w(w_arg)
+        if lgt != 1:
             raise oefmt(space.w_TypeError,
                         "Expected unicode string of length one as wide "
                         "character")
-        val = s[0]
-        push_func(add_arg, argdesc, val)
+        val = rutf8.codepoint_at_pos(s, 0)
+        push_func(add_arg, argdesc, rffi.cast(rffi.WCHAR_T, val))
     else:
         for c in unroll_letters_for_numbers:
             if letter == c:
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -7,7 +7,8 @@
 from pypy.interpreter.error import OperationError, oefmt
 from rpython.rlib.rarithmetic import intmask
 from rpython.rlib import jit
-from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
+from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import Utf8StringBuilder
 
 # ____________________________________________________________
 #
@@ -237,8 +238,8 @@
             filter_is_callable = True
         else:
             if space.isinstance_w(w_ptemplate, space.w_unicode):
-                filter_as_unicode = space.unicode_w(w_ptemplate)
-                literal = u'\\' not in filter_as_unicode
+                filter_as_unicode = space.utf8_w(w_ptemplate)
+                literal = '\\' not in filter_as_unicode
                 use_builder = (
                     space.isinstance_w(w_string, space.w_unicode) and literal)
             else:
@@ -267,7 +268,7 @@
         sublist_w = strbuilder = unicodebuilder = None
         if use_builder:
             if filter_as_unicode is not None:
-                unicodebuilder = UnicodeBuilder(ctx.end)
+                unicodebuilder = Utf8StringBuilder(ctx.end)
             else:
                 assert filter_as_string is not None
                 strbuilder = StringBuilder(ctx.end)
@@ -335,7 +336,9 @@
                 return space.newbytes(strbuilder.build()), n
             else:
                 assert unicodebuilder is not None
-                return space.newunicode(unicodebuilder.build()), n
+                return space.newutf8(unicodebuilder.build(),
+                                     unicodebuilder.get_length(),
+                                     unicodebuilder.get_flag()), n
         else:
             if space.isinstance_w(w_string, space.w_unicode):
                 w_emptystr = space.newunicode(u'')
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -1566,12 +1566,13 @@
                 cadata = space.bufferstr_w(w_cadata)
             else:
                 ca_file_type = SSL_FILETYPE_PEM
-                try:
-                    cadata = space.unicode_w(w_cadata).encode('ascii')
-                except UnicodeEncodeError:
+                w_uni = space.convert_arg_to_w_unicode(w_cadata)
+                if not w_uni.is_ascii():
                     raise oefmt(space.w_TypeError,
                                 "cadata should be a ASCII string or a "
                                 "bytes-like object")
+                cadata = space.utf8_w(w_uni)
+
         if cafile is None and capath is None and cadata is None:
             raise oefmt(space.w_TypeError,
                         "cafile and capath cannot be both omitted")
diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py
--- a/pypy/objspace/std/dictmultiobject.py
+++ b/pypy/objspace/std/dictmultiobject.py
@@ -1257,12 +1257,6 @@
 create_iterator_classes(UnicodeDictStrategy)
 
 
-def from_unicode_key_dict(space, d):
-    strategy = space.fromcache(UnicodeDictStrategy)
-    storage = strategy.erase(d)
-    return W_DictObject(space, strategy, storage)
-
-
 class IntDictStrategy(AbstractTypedStrategy, DictStrategy):
     erase, unerase = rerased.new_erasing_pair("int")
     erase = staticmethod(erase)
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,23 +367,10 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
-    def new_from_utf8(self, utf8s):
-        # XXX: kill me!
-        assert isinstance(utf8s, str)
-        length, flag = rutf8.check_utf8(utf8s, True)
-        return W_UnicodeObject(utf8s, length, flag)
-
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
-    def newunicode(self, unistr):
-        # XXX: kill me!
-        assert isinstance(unistr, unicode)
-        utf8s = unistr.encode("utf-8")
-        length, flag = rutf8.check_utf8(utf8s, True)
-        return self.newutf8(utf8s, length, flag)
-
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -64,6 +64,11 @@
         # - malloced object, which means it has index, then
         #   _index_storage.flags determines the kind
 
+    @staticmethod
+    def from_utf8builder(builder):
+        return W_UnicodeObject(
+            builder.build(), builder.get_length(), builder.get_flag())
+
     def __repr__(self):
         """representation for debugging purposes"""
         return "%s(%r)" % (self.__class__.__name__, self._utf8)
@@ -344,57 +349,38 @@
         return mod_format(space, w_values, self, do_unicode=True)
 
     def descr_swapcase(self, space):
-        selfvalue = self._utf8
-        builder = StringBuilder(len(selfvalue))
-        flag = self._get_flag()
-        i = 0
-        while i < len(selfvalue):
-            ch = rutf8.codepoint_at_pos(selfvalue, i)
-            i = rutf8.next_codepoint_pos(selfvalue, i)
+        input = self._utf8
+        builder = rutf8.Utf8StringBuilder(len(input))
+        for ch in rutf8.Utf8StringIterator(input):
             if unicodedb.isupper(ch):
                 ch = unicodedb.tolower(ch)
             elif unicodedb.islower(ch):
                 ch = unicodedb.toupper(ch)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-        return W_UnicodeObject(builder.build(), self._length, flag)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     def descr_title(self, space):
         if len(self._utf8) == 0:
             return self
-        utf8, flag = self.title_unicode(self._utf8)
-        return W_UnicodeObject(utf8, self._len(), flag)
+        return self.title_unicode(self._utf8)
 
     @jit.elidable
     def title_unicode(self, value):
         input = self._utf8
-        builder = StringBuilder(len(input))
-        i = 0
+        builder = rutf8.Utf8StringBuilder(len(input))
         previous_is_cased = False
-        flag = self._get_flag()
-        while i < len(input):
-            ch = rutf8.codepoint_at_pos(input, i)
-            i = rutf8.next_codepoint_pos(input, i)
+        for ch in rutf8.Utf8StringIterator(input):
             if not previous_is_cased:
                 ch = unicodedb.totitle(ch)
             else:
                 ch = unicodedb.tolower(ch)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
+            builder.append_code(ch)
             previous_is_cased = unicodedb.iscased(ch)
-        return builder.build(), flag
+        return self.from_utf8builder(builder)
 
     def descr_translate(self, space, w_table):
-        input = self._utf8
-        result = StringBuilder(len(input))
-        result_length = 0
-        flag = self._get_flag()
-        i = 0
-        while i < len(input):
-            codepoint = rutf8.codepoint_at_pos(input, i)
-            i = rutf8.next_codepoint_pos(input, i)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for codepoint in rutf8.Utf8StringIterator(self._utf8):
             try:
                 w_newval = space.getitem(w_table, space.newint(codepoint))
             except OperationError as e:
@@ -406,24 +392,19 @@
                 elif space.isinstance_w(w_newval, space.w_int):
                     codepoint = space.int_w(w_newval)
                 elif isinstance(w_newval, W_UnicodeObject):
-                    result.append(w_newval._utf8)
-                    flag = rutf8.combine_flags(flag, w_newval._get_flag())
-                    result_length += w_newval._length
+                    builder.append_utf8(
+                        w_newval._utf8, w_newval._length, w_newval._get_flag())
                     continue
                 else:
                     raise oefmt(space.w_TypeError,
                                 "character mapping must return integer, None "
                                 "or unicode")
             try:
-                if codepoint >= 0x80:
-                    flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-                rutf8.unichr_as_utf8_append(result, codepoint,
-                                            allow_surrogates=True)
-                result_length += 1
+                builder.append_code(codepoint)
             except ValueError:
                 raise oefmt(space.w_TypeError,
                             "character mapping must be in range(0x110000)")
-        return W_UnicodeObject(result.build(), result_length, flag)
+        return self.from_utf8builder(builder)
 
     def descr_find(self, space, w_sub, w_start=None, w_end=None):
         w_result = self._unwrap_and_search(space, w_sub, w_start, w_end)
@@ -517,12 +498,6 @@
     def _join_return_one(self, space, w_obj):
         return space.is_w(space.type(w_obj), space.w_unicode)
 
-    def _join_check_item(self, space, w_obj):
-        if (space.isinstance_w(w_obj, space.w_bytes) or
-            space.isinstance_w(w_obj, space.w_unicode)):
-            return 0
-        return 1
-
     def descr_formatter_parser(self, space):
         from pypy.objspace.std.newformat import unicode_template_formatter
         tformat = unicode_template_formatter(space, space.utf8_w(self))
@@ -534,16 +509,11 @@
         return tformat.formatter_field_name_split()
 
     def descr_lower(self, space):
-        builder = StringBuilder(len(self._utf8))
-        pos = 0
-        flag = self._get_flag()
-        while pos < len(self._utf8):
-            lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos))
-            if lower >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True)
-            pos = rutf8.next_codepoint_pos(self._utf8, pos)
-        return W_UnicodeObject(builder.build(), self._len(), flag)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for ch in rutf8.Utf8StringIterator(self._utf8):
+            lower = unicodedb.tolower(ch)
+            builder.append_code(lower)
+        return self.from_utf8builder(builder)
 
     def descr_isdecimal(self, space):
         return self._is_generic(space, '_isdecimal')
@@ -657,13 +627,11 @@
         flag = self._get_flag()
         for i in range(size):
             w_s = list_w[i]
-            check_item = self._join_check_item(space, w_s)
-            if check_item == 1:
+            if not (space.isinstance_w(w_s, space.w_bytes) or
+                    space.isinstance_w(w_s, space.w_unicode)):
                 raise oefmt(space.w_TypeError,
-                            "sequence item %d: expected string, %T found",
+                            "sequence item %d: expected string or unicode, %T found",
                             i, w_s)
-            elif check_item == 2:
-                return self._join_autoconvert(space, list_w)
             # XXX Maybe the extra copy here is okay? It was basically going to
             #     happen anyway, what with being placed into the builder
             w_u = self.convert_arg_to_w_unicode(space, w_s)
@@ -711,18 +679,11 @@
         return space.newlist(strs_w)
 
     def descr_upper(self, space):
-        value = self._utf8
-        builder = StringBuilder(len(value))
-        flag = self._get_flag()
-        i = 0
-        while i < len(value):
-            uchar = rutf8.codepoint_at_pos(value, i)
-            uchar = unicodedb.toupper(uchar)
-            if uchar >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-            i = rutf8.next_codepoint_pos(value, i)
-            rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True)
-        return W_UnicodeObject(builder.build(), self._length, flag)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        for ch in rutf8.Utf8StringIterator(self._utf8):
+            ch = unicodedb.toupper(ch)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int)
     def descr_zfill(self, space, width):
@@ -826,22 +787,15 @@
         if len(value) == 0:
             return self._empty()
 
-        flag = self._get_flag()
-        builder = StringBuilder(len(value))
-        uchar = rutf8.codepoint_at_pos(value, 0)
-        i = rutf8.next_codepoint_pos(value, 0)
+        builder = rutf8.Utf8StringBuilder(len(self._utf8))
+        it = rutf8.Utf8StringIterator(self._utf8)
+        uchar = it.next()
         ch = unicodedb.toupper(uchar)
-        rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-        if ch >= 0x80:
-            flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-        while i < len(value):
-            uchar = rutf8.codepoint_at_pos(value, i)
-            i = rutf8.next_codepoint_pos(value, i)
-            ch = unicodedb.tolower(uchar)
-            rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True)
-            if ch >= 0x80:
-                flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR)
-        return W_UnicodeObject(builder.build(), self._len(), flag)
+        builder.append_code(ch)
+        for ch in it:
+            ch = unicodedb.tolower(ch)
+            builder.append_code(ch)
+        return self.from_utf8builder(builder)
 
     @unwrap_spec(width=int, w_fillchar=WrappedDefault(' '))
     def descr_center(self, space, width, w_fillchar):
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
 cffi>=1.4.0
-vmprof>=0.4.10  # required to parse log files in rvmprof tests
+
+# parse log files in rvmprof tests
+vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x
 
 # hypothesis is used for test generation on untranslated tests
 hypothesis
diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -687,6 +687,11 @@
         self._lgt += 1
         unichr_as_utf8_append(self._s, code, True)
 
+    def append_utf8(self, utf8, length, flag):
+        self._flag = combine_flags(self._flag, flag)
+        self._lgt += length
+        self._s.append(utf8)
+
     def build(self):
         return self._s.build()
 
@@ -702,10 +707,12 @@
         self._end = len(utf8s)
         self._pos = 0
 
-    def done(self):
-        return self._pos == self._end
+    def __iter__(self):
+        return self
 
     def next(self):

From pypy.commits at gmail.com  Mon Nov 27 16:17:15 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 27 Nov 2017 13:17:15 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: On wide hosts, though,
 we should continue to run this test about lone sorrogates
Message-ID: <5a1c80db.8b421c0a.5110b.e595@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93185:3b96420db19a
Date: 2017-11-23 17:50 +0100
http://bitbucket.org/pypy/pypy/changeset/3b96420db19a/

Log:	On wide hosts, though, we should continue to run this test about
	lone sorrogates

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -155,7 +155,7 @@
             exp_flag = rutf8.FLAG_HAS_SURROGATES
             break
     lgt, flag = rutf8.get_utf8_length_flag(''.join([c.encode('utf8') for c in u]))
-    if exp_flag != rutf8.FLAG_HAS_SURROGATES:
+    if exp_flag != rutf8.FLAG_HAS_SURROGATES or sys.maxunicode > 0xffff:
         assert lgt == exp_lgt
     assert flag == exp_flag
 

From pypy.commits at gmail.com  Mon Nov 27 17:11:14 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 27 Nov 2017 14:11:14 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Fix test (?)
Message-ID: <5a1c8d82.480f1c0a.bb6fc.6702@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93188:a5ddce968cd5
Date: 2017-11-27 23:10 +0100
http://bitbucket.org/pypy/pypy/changeset/a5ddce968cd5/

Log:	Fix test (?)

diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -199,7 +199,7 @@
     s = rutf8.Utf8StringBuilder()
     s.append_utf8("abc", 3, rutf8.FLAG_ASCII)
     assert s.get_flag() == rutf8.FLAG_ASCII
-    assert s.get_length() == 1
+    assert s.get_length() == 3
     assert s.build().decode("utf8") == u"abc"
 
     s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR)

From pypy.commits at gmail.com  Mon Nov 27 17:11:12 2017
From: pypy.commits at gmail.com (arigo)
Date: Mon, 27 Nov 2017 14:11:12 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Add a utility function that I
 might use in rsre
Message-ID: <5a1c8d80.ceb1df0a.8f6ae.3c58@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93187:7839b53125bb
Date: 2017-11-27 23:09 +0100
http://bitbucket.org/pypy/pypy/changeset/7839b53125bb/

Log:	Add a utility function that I might use in rsre

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -567,6 +567,31 @@
         bytepos = next_codepoint_pos(utf8, bytepos)
     return codepoint_at_pos(utf8, bytepos)
 
+ at jit.dont_look_inside
+def codepoint_index_at_byte_position(utf8, storage, bytepos):
+    """ Return the character index for which
+    codepoint_position_at_index(index) == bytepos.
+    This is a relatively slow operation in that it runs in a time
+    logarithmic in the length of the string, plus some constant that
+    is not tiny either.
+    """
+    index_min = 0
+    index_max = len(storage.contents) - 1
+    while index_min < index_max:
+        index_middle = (index_min + index_max + 1) // 2
+        base_bytepos = storage.contents[index_middle].baseindex
+        if bytepos < base_bytepos:
+            index_max = index_middle - 1
+        else:
+            index_min = index_middle
+    bytepos1 = storage.contents[index_min].baseindex
+    result = index_min << 6
+    while bytepos1 < bytepos:
+        bytepos1 = next_codepoint_pos(utf8, bytepos1)
+        result += 1
+    return result
+
+
 def make_utf8_escape_function(pass_printable=False, quotes=False, prefix=None):
     @jit.elidable
     def unicode_escape(s):
diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py
--- a/rpython/rlib/test/test_rutf8.py
+++ b/rpython/rlib/test/test_rutf8.py
@@ -128,6 +128,17 @@
         assert (rutf8.codepoint_position_at_index(u.encode('utf8'), index, i) ==
                 len(u[:i].encode('utf8')))
 
+ at given(strategies.text(average_size=140))
+ at example(u'x' * 64 * 5)
+ at example(u'x' * (64 * 5 - 1))
+def test_codepoint_index_at_byte_position(u):
+    storage = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u))
+    for i in range(len(u) + 1):
+        bytepos = len(u[:i].encode('utf8'))
+        assert rutf8.codepoint_index_at_byte_position(
+                       u.encode('utf8'), storage, bytepos) == i
+
+
 repr_func = rutf8.make_utf8_escape_function(prefix='u', pass_printable=False,
                                             quotes=True)
 

From pypy.commits at gmail.com  Tue Nov 28 08:35:26 2017
From: pypy.commits at gmail.com (mattip)
Date: Tue, 28 Nov 2017 05:35:26 -0800 (PST)
Subject: [pypy-commit] pypy default: cleanup,
 enable VSXXXCOMNTOOLS in win32 platform
Message-ID: <5a1d661e.94571c0a.8e5e1.0bde@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93189:1351a1844107
Date: 2017-11-28 15:33 +0200
http://bitbucket.org/pypy/pypy/changeset/1351a1844107/

Log:	cleanup, enable VSXXXCOMNTOOLS in win32 platform

diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -10,21 +10,13 @@
 rpydir = str(py.path.local(rpython.__file__).join('..'))
 
 def _get_compiler_type(cc, x64_flag):
-    import subprocess
     if not cc:
         cc = os.environ.get('CC','')
     if not cc:
         return MsvcPlatform(x64=x64_flag)
     elif cc.startswith('mingw') or cc == 'gcc':
         return MingwPlatform(cc)
-    else:
-        return MsvcPlatform(cc=cc, x64=x64_flag)
-    try:
-        subprocess.check_output([cc, '--version'])
-    except:
-        raise ValueError("Could not find compiler specified by cc option '%s',"
-                         " it must be a valid exe file on your path" % cc)
-    return MingwPlatform(cc)
+    return MsvcPlatform(cc=cc, x64=x64_flag)
 
 def Windows(cc=None):
     return _get_compiler_type(cc, False)
@@ -60,6 +52,10 @@
     vcvars = None
     try:
         toolsdir = os.environ['VS%sCOMNTOOLS' % vsver]
+        if x64flag:
+            vcvars = os.path.join(toolsdir, "vcvarsamd64.bat")
+        else:
+            vcvars = os.path.join(toolsdir, 'vsvars32.bat')
     except KeyError:
         # try to import from the registry, as done in setuptools
         # XXX works for 90 but is it generalizable?

From pypy.commits at gmail.com  Tue Nov 28 10:10:40 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 07:10:40 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: a branch where to
 fix again the interaction of vmprof ans stacklets;
 in particular, after fix-vmprof-stacklet-switch we no longer segfault, but
 we stop sampling after a switch
Message-ID: <5a1d7c70.c97e1c0a.c2665.d3ec@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93190:b8f121ce766d
Date: 2017-11-28 11:50 +0100
http://bitbucket.org/pypy/pypy/changeset/b8f121ce766d/

Log:	a branch where to fix again the interaction of vmprof ans stacklets;
	in particular, after fix-vmprof-stacklet-switch we no longer
	segfault, but we stop sampling after a switch


From pypy.commits at gmail.com  Tue Nov 28 10:10:43 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 07:10:43 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: move {start,
 stop}_sampling inside the VMProf API,
 and start to write a fake class to test the correct usage of them
Message-ID: <5a1d7c73.130d1c0a.3131c.833b@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93191:2f204b1c432c
Date: 2017-11-28 15:38 +0100
http://bitbucket.org/pypy/pypy/changeset/2f204b1c432c/

Log:	move {start,stop}_sampling inside the VMProf API, and start to write
	a fake class to test the correct usage of them

diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -56,10 +56,7 @@
     return None
 
 def stop_sampling():
-    from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling
-    fd = vmprof_stop_sampling()
-    return rffi.cast(lltype.Signed, fd)
+    return _get_vmprof().stop_sampling()
 
 def start_sampling():
-    from rpython.rlib.rvmprof.cintf import vmprof_start_sampling
-    vmprof_start_sampling()
+    return _get_vmprof().start_sampling()
diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py
--- a/rpython/rlib/rvmprof/rvmprof.py
+++ b/rpython/rlib/rvmprof/rvmprof.py
@@ -168,6 +168,21 @@
         if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0:
             raise VMProfError("vmprof buffers full!  disk full or too slow")
 
+    def stop_sampling(self):
+        """
+        Temporarily stop the sampling of stack frames. Signals are still
+        delivered, but are ignored.
+        """
+        fd = self.cintf.vmprof_stop_sampling()
+        return rffi.cast(lltype.Signed, fd)
+
+    def start_sampling(self):
+        """
+        Undo the effect of stop_sampling
+        """
+        self.cintf.vmprof_start_sampling()
+
+
 def vmprof_execute_code(name, get_code_fn, result_class=None,
                         _hack_update_stack_untranslated=False):
     """Decorator to be used on the function that interprets a code object.
diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -0,0 +1,26 @@
+
+class FakeVMProf(object):
+
+    def __init__(self):
+        self._enabled = False
+        self._ignore_signals = 1
+
+    # --- VMProf official API ---
+    # add fake methods as needed by the tests
+
+    def stop_sampling(self):
+        self._ignore_signals += 1
+
+    def start_sampling(self):
+        assert self._ignore_signals > 0, ('calling start_sampling() without '
+                                          'the corresponding stop_sampling()?')
+        self._ignore_signals -= 1
+
+    # --- FakeVMProf specific API ---
+    # this API is not part of rvmprof, but available only inside tests using
+    # fakervmprof
+
+    @property
+    def is_sampling_enabled(self):
+        return self._ignore_signals == 0
+
diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rvmprof/test/test_support.py
@@ -0,0 +1,23 @@
+import pytest
+from rpython.rlib.rvmprof.test.support import FakeVMProf
+
+class TestFakeVMProf(object):
+
+    def test_sampling(self):
+        fake = FakeVMProf()
+        assert not fake.is_sampling_enabled
+        #
+        fake.start_sampling()
+        assert fake.is_sampling_enabled
+        #
+        fake.stop_sampling()
+        fake.stop_sampling()
+        assert not fake.is_sampling_enabled
+        #
+        fake.start_sampling()
+        assert not fake.is_sampling_enabled
+        fake.start_sampling()
+        assert fake.is_sampling_enabled
+        #
+        pytest.raises(AssertionError, "fake.start_sampling()")
+    

From pypy.commits at gmail.com  Tue Nov 28 10:10:45 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 07:10:45 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: WIP: introduce a
 pytest fixture which allow us to easily use a global FakeVMProf instead of
 the real one
Message-ID: <5a1d7c75.923e1c0a.75bd2.eef1@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93192:d5d42f493530
Date: 2017-11-28 15:47 +0100
http://bitbucket.org/pypy/pypy/changeset/d5d42f493530/

Log:	WIP: introduce a pytest fixture which allow us to easily use a
	global FakeVMProf instead of the real one

diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
--- a/rpython/rlib/rvmprof/test/support.py
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -1,3 +1,5 @@
+import pytest
+from rpython.rlib import rvmprof
 
 class FakeVMProf(object):
 
@@ -24,3 +26,12 @@
     def is_sampling_enabled(self):
         return self._ignore_signals == 0
 
+
+ at pytest.fixture
+def fakevmprof(request, monkeypatch):
+    fake = FakeVMProf()
+    def _get_fake_vmprof():
+        return fake
+    monkeypatch.setattr(rvmprof.rvmprof, '_get_vmprof', _get_fake_vmprof)
+    return fake
+
diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py
--- a/rpython/rlib/rvmprof/test/test_support.py
+++ b/rpython/rlib/rvmprof/test/test_support.py
@@ -1,5 +1,6 @@
 import pytest
-from rpython.rlib.rvmprof.test.support import FakeVMProf
+from rpython.rlib import rvmprof
+from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof
 
 class TestFakeVMProf(object):
 
@@ -21,3 +22,17 @@
         #
         pytest.raises(AssertionError, "fake.start_sampling()")
     
+
+
+class TestFixture(object):
+
+    def test_fixture(self, fakevmprof):
+        assert isinstance(fakevmprof, FakeVMProf)
+        assert rvmprof.rvmprof._get_vmprof() is fakevmprof
+        #
+        # tweak sampling using the "real" API, and check that we actually used
+        # the fake
+        rvmprof.start_sampling()
+        assert fakevmprof.is_sampling_enabled
+        rvmprof.stop_sampling()
+        assert not fakevmprof.is_sampling_enabled

From pypy.commits at gmail.com  Tue Nov 28 10:10:47 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 07:10:47 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: 1) we can't
 monkey-patch _get_vmprof because it's imported in two places;
 insead it's easier to monkey-patch the singleton it returns;
 2) move vmprof_{start,stop}_sampling to the proper cintf namespace, so that
 they can no longer be called directly
Message-ID: <5a1d7c77.e28edf0a.a85e7.d837@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93193:b85210ca9c20
Date: 2017-11-28 15:54 +0100
http://bitbucket.org/pypy/pypy/changeset/b85210ca9c20/

Log:	1) we can't monkey-patch _get_vmprof because it's imported in two
	places; insead it's easier to monkey-patch the singleton it returns;
	2) move vmprof_{start,stop}_sampling to the proper cintf namespace,
	so that they can no longer be called directly

diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -122,32 +122,16 @@
                                               lltype.Signed, compilation_info=eci,
                                               _nowrapper=True)
 
+    vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
+                                           rffi.INT, compilation_info=eci,
+                                           _nowrapper=True)
+    vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
+                                            lltype.Void, compilation_info=eci,
+                                            _nowrapper=True)
+
     return CInterface(locals())
 
 
-# this is always present, but compiles to no-op if RPYTHON_VMPROF is not
-# defined (i.e. if we don't actually use vmprof in the generated C)
-auto_eci = ExternalCompilationInfo(post_include_bits=["""
-#ifndef RPYTHON_VMPROF
-#  define vmprof_stop_sampling()    (-1)
-#  define vmprof_start_sampling()   ((void)0)
-#endif
-"""])
-
-if get_translation_config() is None:
-    # tests need the full eci here
-    _eci = global_eci
-else:
-    _eci = auto_eci
-
-vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [],
-                                       rffi.INT, compilation_info=_eci,
-                                       _nowrapper=True)
-vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [],
-                                        lltype.Void, compilation_info=_eci,
-                                        _nowrapper=True)
-
-
 class CInterface(object):
     def __init__(self, namespace):
         for k, v in namespace.iteritems():
diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
--- a/rpython/rlib/rvmprof/test/support.py
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -30,8 +30,6 @@
 @pytest.fixture
 def fakevmprof(request, monkeypatch):
     fake = FakeVMProf()
-    def _get_fake_vmprof():
-        return fake
-    monkeypatch.setattr(rvmprof.rvmprof, '_get_vmprof', _get_fake_vmprof)
+    monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake)
     return fake
 
diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py
--- a/rpython/rlib/rvmprof/test/test_support.py
+++ b/rpython/rlib/rvmprof/test/test_support.py
@@ -28,7 +28,7 @@
 
     def test_fixture(self, fakevmprof):
         assert isinstance(fakevmprof, FakeVMProf)
-        assert rvmprof.rvmprof._get_vmprof() is fakevmprof
+        assert rvmprof._get_vmprof() is fakevmprof
         #
         # tweak sampling using the "real" API, and check that we actually used
         # the fake

From pypy.commits at gmail.com  Tue Nov 28 10:10:49 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 07:10:49 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: add a pytest
 finalizer to check that we called {start,
 stop}_sampling an even amount of times. I have no idea how to write a test
 for it, though :(
Message-ID: <5a1d7c79.8a5b1c0a.4a864.12f5@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93194:ff5fd2e1f430
Date: 2017-11-28 16:09 +0100
http://bitbucket.org/pypy/pypy/changeset/ff5fd2e1f430/

Log:	add a pytest finalizer to check that we called {start,stop}_sampling
	an even amount of times. I have no idea how to write a test for it,
	though :(

diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
--- a/rpython/rlib/rvmprof/test/support.py
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -31,5 +31,12 @@
 def fakevmprof(request, monkeypatch):
     fake = FakeVMProf()
     monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake)
+    #
+    def check_status():
+        if fake._ignore_signals != 1:
+            msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, '
+                   'got %d. This probably means that you called '
+                   '{start,stop}_sampling() a wrong number of times')
+            raise ValueError, msg % fake._ignore_signals
+    request.addfinalizer(check_status)
     return fake
-

From pypy.commits at gmail.com  Tue Nov 28 11:50:08 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 08:50:08 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: move the vmprof
 API needed by rstacklet from cintf to rvmprof/__init__.py,
 to integrate better with fakevmprof
Message-ID: <5a1d93c0.05d31c0a.e821a.b73b@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93195:2907f533041c
Date: 2017-11-28 16:48 +0100
http://bitbucket.org/pypy/pypy/changeset/2907f533041c/

Log:	move the vmprof API needed by rstacklet from cintf to
	rvmprof/__init__.py, to integrate better with fakevmprof

diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py
--- a/rpython/rlib/rstacklet.py
+++ b/rpython/rlib/rstacklet.py
@@ -3,7 +3,7 @@
 from rpython.rlib import jit
 from rpython.rlib.objectmodel import fetch_translated_config
 from rpython.rtyper.lltypesystem import lltype, llmemory
-from rpython.rlib.rvmprof import cintf
+from rpython.rlib import rvmprof
 
 DEBUG = False
 
@@ -25,12 +25,12 @@
     def new(self, callback, arg=llmemory.NULL):
         if DEBUG:
             callback = _debug_wrapper(callback)
-        x = cintf.save_rvmprof_stack()
+        x = rvmprof.save_stack()
         try:
-            cintf.empty_rvmprof_stack()
+            rvmprof.empty_stack()
             h = self._gcrootfinder.new(self, callback, arg)
         finally:
-            cintf.restore_rvmprof_stack(x)
+            rvmprof.restore_stack(x)
         if DEBUG:
             debug.add(h)
         return h
@@ -40,11 +40,11 @@
     def switch(self, stacklet):
         if DEBUG:
             debug.remove(stacklet)
-        x = cintf.save_rvmprof_stack()
+        x = rvmprof.save_stack()
         try:
             h = self._gcrootfinder.switch(stacklet)
         finally:
-            cintf.restore_rvmprof_stack(x)
+            rvmprof.restore_stack(x)
         if DEBUG:
             debug.add(h)
         return h
diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py
--- a/rpython/rlib/rvmprof/__init__.py
+++ b/rpython/rlib/rvmprof/__init__.py
@@ -60,3 +60,23 @@
 
 def start_sampling():
     return _get_vmprof().start_sampling()
+
+# ----------------
+# stacklet support
+# ----------------
+#
+# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf":
+# not sure why they are a global. Eventually, we should probably fix all this
+# mess.
+from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK
+
+def save_stack():
+    stop_sampling()
+    return vmprof_tl_stack.get_or_make_raw()
+
+def empty_stack():
+    vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK))
+
+def restore_stack(x):
+    vmprof_tl_stack.setraw(x)
+    start_sampling()
diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py
--- a/rpython/rlib/rvmprof/cintf.py
+++ b/rpython/rlib/rvmprof/cintf.py
@@ -216,20 +216,6 @@
         leave_code(s)
 
 #
-# stacklet support
-
-def save_rvmprof_stack():
-    vmprof_stop_sampling()
-    return vmprof_tl_stack.get_or_make_raw()
-
-def empty_rvmprof_stack():
-    vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK))
-
-def restore_rvmprof_stack(x):
-    vmprof_tl_stack.setraw(x)
-    vmprof_start_sampling()
-
-#
 # traceback support
 
 def get_rvmprof_stack():

From pypy.commits at gmail.com  Tue Nov 28 11:50:10 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 08:50:10 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: make check_status
 a real method, so that it can be tested and used also without the fixture
Message-ID: <5a1d93c2.169a1c0a.63814.2120@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93196:d3c13697bffe
Date: 2017-11-28 17:00 +0100
http://bitbucket.org/pypy/pypy/changeset/d3c13697bffe/

Log:	make check_status a real method, so that it can be tested and used
	also without the fixture

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -1,4 +1,5 @@
 import os
+from rpython.rlib.rvmprof.test.support import fakevmprof
 from pypy.module._continuation.test.support import BaseAppTest
 
 
diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
--- a/rpython/rlib/rvmprof/test/support.py
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -26,17 +26,20 @@
     def is_sampling_enabled(self):
         return self._ignore_signals == 0
 
+    def check_status(self):
+        """
+        To be called during test teardown
+        """
+        if self._ignore_signals != 1:
+            msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, '
+                   'got %d. This probably means that you called '
+                   '{start,stop}_sampling() a wrong number of times')
+            raise ValueError, msg % self._ignore_signals
+
 
 @pytest.fixture
 def fakevmprof(request, monkeypatch):
     fake = FakeVMProf()
     monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake)
-    #
-    def check_status():
-        if fake._ignore_signals != 1:
-            msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, '
-                   'got %d. This probably means that you called '
-                   '{start,stop}_sampling() a wrong number of times')
-            raise ValueError, msg % fake._ignore_signals
-    request.addfinalizer(check_status)
+    request.addfinalizer(fake.check_status)
     return fake
diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py
--- a/rpython/rlib/rvmprof/test/test_support.py
+++ b/rpython/rlib/rvmprof/test/test_support.py
@@ -22,6 +22,10 @@
         #
         pytest.raises(AssertionError, "fake.start_sampling()")
     
+    def test_check_status(self):
+        fake = FakeVMProf()
+        fake.stop_sampling()
+        pytest.raises(ValueError, "fake.check_status()")
 
 
 class TestFixture(object):

From pypy.commits at gmail.com  Tue Nov 28 11:50:16 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 08:50:16 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: fix
 test_sampling_inside_callback by restarting sampling inside
Message-ID: <5a1d93c8.0e97df0a.585f3.c1e3@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93199:44449d69030b
Date: 2017-11-28 17:49 +0100
http://bitbucket.org/pypy/pypy/changeset/44449d69030b/

Log:	fix test_sampling_inside_callback by restarting sampling inside
	new_stacklet_callback, after it was stopped inside switch().

	This fix is a bit obscure because one pairt of start/stop is inside
	rstacklet.switch, while the other is in interp_continuation. OTOH,
	if we do the fix inside rstacklet we need to replicate it for every
	GcRootFinder, which is also obscure and a nightmare to test. Note
	sure what is the least ugly :(

diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py
--- a/pypy/module/_continuation/interp_continuation.py
+++ b/pypy/module/_continuation/interp_continuation.py
@@ -1,5 +1,6 @@
 from rpython.rlib.rstacklet import StackletThread
 from rpython.rlib import jit
+from rpython.rlib import rvmprof
 from pypy.interpreter.error import OperationError, get_cleared_operation_error
 from pypy.interpreter.executioncontext import ExecutionContext
 from pypy.interpreter.baseobjspace import W_Root
@@ -222,12 +223,15 @@
     self.h = h
     global_state.clear()
     try:
+        rvmprof.start_sampling()
         frame = self.bottomframe
         w_result = frame.execute_frame()
     except Exception as e:
         global_state.propagate_exception = e
     else:
         global_state.w_value = w_result
+    finally:
+        rvmprof.stop_sampling()
     self.sthread.ec.topframeref = jit.vref_None
     global_state.origin = self
     global_state.destination = self

From pypy.commits at gmail.com  Tue Nov 28 11:50:14 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 08:50:14 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: rename the fixture
 to app_fakevmprof and use it on all tests: this way,
 it automatically checks that {start,
 stop}_sampling has been called an even amount of times
Message-ID: <5a1d93c6.42da1c0a.cfa52.fe60@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93198:a48521eb6944
Date: 2017-11-28 17:30 +0100
http://bitbucket.org/pypy/pypy/changeset/a48521eb6944/

Log:	rename the fixture to app_fakevmprof and use it on all tests: this
	way, it automatically checks that {start,stop}_sampling has been
	called an even amount of times

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -4,7 +4,7 @@
 from pypy.interpreter.gateway import interp2app
 from pypy.module._continuation.test.support import BaseAppTest
 
-
+ at pytest.mark.usefixtures('app_fakevmprof')
 class AppTestStacklet(BaseAppTest):
     def setup_class(cls):
         BaseAppTest.setup_class.im_func(cls)
@@ -41,6 +41,29 @@
             # make sure that "self.stack" does not pass the self
             cls.w_stack = staticmethod(cls.w_stack.im_func)
 
+
+    @pytest.fixture
+    def app_fakevmprof(self, fakevmprof):
+        """
+        This is automaticaly re-initialized for every method: thanks to
+        fakevmprof's finalizer, it checks that we called {start,stop}_sampling
+        the in pairs
+        """
+        w = self.space.wrap
+        i2a = interp2app
+        def is_sampling_enabled(space):
+            return space.wrap(fakevmprof.is_sampling_enabled)
+        self.w_is_sampling_enabled = w(i2a(is_sampling_enabled))
+        #
+        def start_sampling(space):
+            fakevmprof.start_sampling()
+        self.w_start_sampling = w(i2a(start_sampling))
+        #
+        def stop_sampling(space):
+            fakevmprof.stop_sampling()
+        self.w_stop_sampling = w(i2a(stop_sampling))
+
+
     def test_new_empty(self):
         from _continuation import continulet
         #
@@ -774,29 +797,6 @@
         continulet.switch(c1, to=c2)
         raises(error, continulet.switch, c1, to=c2)
 
-
- at pytest.mark.usefixtures('init_method')
-class AppTestVMProf(BaseAppTest):
-
-    @pytest.fixture
-    def init_method(self, fakevmprof):
-        """
-        This is automaticaly re-initialized for every method
-        """
-        w = self.space.wrap
-        i2a = interp2app
-        def is_sampling_enabled(space):
-            return space.wrap(fakevmprof.is_sampling_enabled)
-        self.w_is_sampling_enabled = w(i2a(is_sampling_enabled))
-        #
-        def start_sampling(space):
-            fakevmprof.start_sampling()
-        self.w_start_sampling = w(i2a(start_sampling))
-        #
-        def stop_sampling(space):
-            fakevmprof.stop_sampling()
-        self.w_stop_sampling = w(i2a(stop_sampling))
-
     def test_sampling_inside_callback(self):
         from _continuation import continulet
         #
@@ -804,10 +804,12 @@
             assert self.is_sampling_enabled()
             return 42
         #
-        self.start_sampling()
-        assert self.is_sampling_enabled()
-        c = continulet(my_callback)
-        res = c.switch()
-        assert res == 42
-        assert self.is_sampling_enabled()
-        self.stop_sampling()
+        try:
+            self.start_sampling()
+            assert self.is_sampling_enabled()
+            c = continulet(my_callback)
+            res = c.switch()
+            assert res == 42
+            assert self.is_sampling_enabled()
+        finally:
+            self.stop_sampling()

From pypy.commits at gmail.com  Tue Nov 28 11:50:12 2017
From: pypy.commits at gmail.com (antocuni)
Date: Tue, 28 Nov 2017 08:50:12 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: WIP: add a failing
 test which shows that we are not taking samples inside callbacks
Message-ID: <5a1d93c4.8f9ddf0a.f99d6.4ed7@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93197:85cbd648e7dd
Date: 2017-11-28 17:24 +0100
http://bitbucket.org/pypy/pypy/changeset/85cbd648e7dd/

Log:	WIP: add a failing test which shows that we are not taking samples
	inside callbacks

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -1,5 +1,7 @@
+import pytest
 import os
 from rpython.rlib.rvmprof.test.support import fakevmprof
+from pypy.interpreter.gateway import interp2app
 from pypy.module._continuation.test.support import BaseAppTest
 
 
@@ -771,3 +773,41 @@
 
         continulet.switch(c1, to=c2)
         raises(error, continulet.switch, c1, to=c2)
+
+
+ at pytest.mark.usefixtures('init_method')
+class AppTestVMProf(BaseAppTest):
+
+    @pytest.fixture
+    def init_method(self, fakevmprof):
+        """
+        This is automaticaly re-initialized for every method
+        """
+        w = self.space.wrap
+        i2a = interp2app
+        def is_sampling_enabled(space):
+            return space.wrap(fakevmprof.is_sampling_enabled)
+        self.w_is_sampling_enabled = w(i2a(is_sampling_enabled))
+        #
+        def start_sampling(space):
+            fakevmprof.start_sampling()
+        self.w_start_sampling = w(i2a(start_sampling))
+        #
+        def stop_sampling(space):
+            fakevmprof.stop_sampling()
+        self.w_stop_sampling = w(i2a(stop_sampling))
+
+    def test_sampling_inside_callback(self):
+        from _continuation import continulet
+        #
+        def my_callback(c1):
+            assert self.is_sampling_enabled()
+            return 42
+        #
+        self.start_sampling()
+        assert self.is_sampling_enabled()
+        c = continulet(my_callback)
+        res = c.switch()
+        assert res == 42
+        assert self.is_sampling_enabled()
+        self.stop_sampling()
diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py
--- a/rpython/rlib/rvmprof/test/support.py
+++ b/rpython/rlib/rvmprof/test/support.py
@@ -20,7 +20,7 @@
 
     # --- FakeVMProf specific API ---
     # this API is not part of rvmprof, but available only inside tests using
-    # fakervmprof
+    # fakevmprof
 
     @property
     def is_sampling_enabled(self):

From pypy.commits at gmail.com  Tue Nov 28 12:09:00 2017
From: pypy.commits at gmail.com (mattip)
Date: Tue, 28 Nov 2017 09:09:00 -0800 (PST)
Subject: [pypy-commit] pypy default: win32 fixes,
 even msdb does not know if it should be vcvarsXX.bat or vsvarsXX.bat
Message-ID: <5a1d982c.476b1c0a.0877.d3fc@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93200:d402ee2877e6
Date: 2017-11-28 19:11 +0200
http://bitbucket.org/pypy/pypy/changeset/d402ee2877e6/

Log:	win32 fixes, even msdb does not know if it should be vcvarsXX.bat or
	vsvarsXX.bat

diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py
--- a/rpython/translator/platform/test/test_platform.py
+++ b/rpython/translator/platform/test/test_platform.py
@@ -113,8 +113,10 @@
     def test_environment_inheritance(self):
         # make sure that environment is inherited
         cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]'
+        env = {'_SOME_VARIABLE_1':'xyz'}
+        env['PATH'] = os.environ['PATH']
         res = self.platform.execute(sys.executable, ['-c', cmd % 1],
-                                    env={'_SOME_VARIABLE_1':'xyz'})
+                                    env=env)
         assert 'xyz' in res.out
         os.environ['_SOME_VARIABLE_2'] = 'zyz'
         try:
diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -52,10 +52,6 @@
     vcvars = None
     try:
         toolsdir = os.environ['VS%sCOMNTOOLS' % vsver]
-        if x64flag:
-            vcvars = os.path.join(toolsdir, "vcvarsamd64.bat")
-        else:
-            vcvars = os.path.join(toolsdir, 'vsvars32.bat')
     except KeyError:
         # try to import from the registry, as done in setuptools
         # XXX works for 90 but is it generalizable?
@@ -69,7 +65,12 @@
             vcbindir = os.path.join(vcinstalldir, 'BIN')
             vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat')
         else:
-            vcvars = os.path.join(toolsdir, 'vsvars32.bat')
+            vcvars = os.path.join(toolsdir, 'vcvars32.bat')
+            if not os.path.exists(vcvars):
+                # even msdn does not know which to run
+                # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx
+                # wich names both
+                vcvars = os.path.join(toolsdir, 'vsvars32.bat') 
 
     import subprocess
     try:

From pypy.commits at gmail.com  Tue Nov 28 14:25:21 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 28 Nov 2017 11:25:21 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: Remove newunicode() and unicode_w()
 again
Message-ID: <5a1db821.2a9ddf0a.3f404.a46b@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93201:26f1724ee623
Date: 2017-11-28 19:21 +0000
http://bitbucket.org/pypy/pypy/changeset/26f1724ee623/

Log:	Remove newunicode() and unicode_w() again

diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1760,10 +1760,6 @@
     def utf8_w(self, w_obj):
         return w_obj.utf8_w(self)
 
-    def unicode_w(self, w_obj):
-        # XXX: kill me!
-        return w_obj.utf8_w(self).decode('utf-8')
-
     def convert_to_w_unicode(self, w_obj):
         return w_obj.convert_to_w_unicode(self)
 
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -27,7 +27,8 @@
         w_newline=space.newtext(mode))
     lines = []
     while True:
-        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        w_line = w_textio.readline_w(space, space.newint(limit))
+        line = space.utf8_w(w_line).decode('utf-8')
         if limit > 0:
             assert len(line) <= limit
         if line:
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -377,13 +377,6 @@
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding
 
-    def newunicode(self, unistr):
-        # XXX: kill me!
-        assert isinstance(unistr, unicode)
-        utf8s = unistr.encode("utf-8")
-        length, flag = rutf8.check_utf8(utf8s, True)
-        return self.newutf8(utf8s, length, flag)
-
     def type(self, w_obj):
         jit.promote(w_obj.__class__)
         return w_obj.getclass(self)

From pypy.commits at gmail.com  Tue Nov 28 14:25:23 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 28 Nov 2017 11:25:23 -0800 (PST)
Subject: [pypy-commit] pypy utf8-io: close branch before merging
Message-ID: <5a1db823.90a9df0a.93d36.0bb2@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: utf8-io
Changeset: r93202:a4e5720003bb
Date: 2017-11-28 19:22 +0000
http://bitbucket.org/pypy/pypy/changeset/a4e5720003bb/

Log:	close branch before merging


From pypy.commits at gmail.com  Tue Nov 28 14:25:25 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 28 Nov 2017 11:25:25 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: Merge branch 'utf8-io': fix the
 _io module
Message-ID: <5a1db825.3bb0df0a.d4ca0.74bc@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: unicode-utf8
Changeset: r93203:290c2d5ff0bb
Date: 2017-11-28 19:23 +0000
http://bitbucket.org/pypy/pypy/changeset/290c2d5ff0bb/

Log:	Merge branch 'utf8-io': fix the _io module

diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py
--- a/pypy/module/_io/interp_stringio.py
+++ b/pypy/module/_io/interp_stringio.py
@@ -17,20 +17,20 @@
         if len(self.data) > newlength:
             self.data = self.data[:newlength]
         if len(self.data) < newlength:
-            self.data.extend([u'\0'] * (newlength - len(self.data)))
+            self.data.extend(['\0'] * (newlength - len(self.data)))
 
     def read(self, size):
         start = self.pos
         available = len(self.data) - start
         if available <= 0:
-            return u''
+            return ''
         if size >= 0 and size <= available:
             end = start + size
         else:
             end = len(self.data)
         assert 0 <= start <= end
         self.pos = end
-        return u''.join(self.data[start:end])
+        return ''.join(self.data[start:end])
 
     def _convert_limit(self, limit):
         if limit < 0 or limit > len(self.data) - self.pos:
@@ -58,7 +58,7 @@
                 else:
                     break
         self.pos = pos
-        result = u''.join(self.data[start:pos])
+        result = ''.join(self.data[start:pos])
         return result
 
     def readline(self, marker, limit):
@@ -79,7 +79,7 @@
         if not found:
             pos = end
         self.pos = pos
-        result = u''.join(self.data[start:pos])
+        result = ''.join(self.data[start:pos])
         return result
 
     def write(self, string):
@@ -99,7 +99,7 @@
             self.resize(size)
 
     def getvalue(self):
-        return u''.join(self.data)
+        return ''.join(self.data)
 
 
 class W_StringIO(W_TextIOBase):
@@ -118,10 +118,10 @@
         if space.is_w(w_newline, space.w_None):
             newline = None
         else:
-            newline = space.unicode_w(w_newline)
+            newline = space.utf8_w(w_newline)
 
-        if (newline is not None and newline != u"" and newline != u"\n" and
-                newline != u"\r" and newline != u"\r\n"):
+        if (newline is not None and newline != "" and newline != "\n" and
+                newline != "\r" and newline != "\r\n"):
             # Not using oefmt() because I don't know how to use it
             # with unicode
             raise OperationError(space.w_ValueError,
@@ -131,9 +131,9 @@
             )
         if newline is not None:
             self.readnl = newline
-        self.readuniversal = newline is None or newline == u""
+        self.readuniversal = newline is None or newline == ""
         self.readtranslate = newline is None
-        if newline and newline[0] == u"\r":
+        if newline and newline[0] == "\r":
             self.writenl = newline
         if self.readuniversal:
             self.w_decoder = space.call_function(
@@ -152,7 +152,7 @@
         if self.readnl is None:
             w_readnl = space.w_None
         else:
-            w_readnl = space.str(space.newunicode(self.readnl))  # YYY
+            w_readnl = space.str(space.new_from_utf8(self.readnl))  # YYY
         return space.newtuple([
             w_initialval, w_readnl, space.newint(self.buf.pos), w_dict
         ])
@@ -179,7 +179,7 @@
         # because the string value in the state tuple has already been
         # translated once by __init__. So we do not take any chance and replace
         # object's buffer completely
-        initval = space.unicode_w(w_initval)
+        initval = space.utf8_w(w_initval)
         pos = space.getindex_w(w_pos, space.w_TypeError)
         if pos < 0:
             raise oefmt(space.w_ValueError,
@@ -215,8 +215,8 @@
         if self.writenl:
             w_decoded = space.call_method(
                 w_decoded, "replace",
-                space.newtext("\n"), space.newunicode(self.writenl))
-        string = space.unicode_w(w_decoded)
+                space.newtext("\n"), space.new_from_utf8(self.writenl))
+        string = space.utf8_w(w_decoded)
         if string:
             self.buf.write(string)
 
@@ -225,7 +225,7 @@
     def read_w(self, space, w_size=None):
         self._check_closed(space)
         size = convert_size(space, w_size)
-        return space.newunicode(self.buf.read(size))
+        return space.new_from_utf8(self.buf.read(size))
 
     def readline_w(self, space, w_limit=None):
         self._check_closed(space)
@@ -235,11 +235,11 @@
         else:
             if self.readtranslate:
                 # Newlines are already translated, only search for \n
-                newline = u'\n'
+                newline = '\n'
             else:
                 newline = self.readnl
             result = self.buf.readline(newline, limit)
-        return space.newunicode(result)
+        return space.new_from_utf8(result)
 
 
     @unwrap_spec(pos=int, mode=int)
@@ -276,7 +276,7 @@
 
     def getvalue_w(self, space):
         self._check_closed(space)
-        return space.newunicode(self.buf.getvalue())
+        return space.new_from_utf8(self.buf.getvalue())
 
     def readable_w(self, space):
         self._check_closed(space)
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -11,7 +11,8 @@
 from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong
 from rpython.rlib.rbigint import rbigint
 from rpython.rlib.rstring import StringBuilder
-from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8
+from rpython.rlib.rutf8 import (
+    FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8)
 
 
 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3)
@@ -303,7 +304,7 @@
 
     def set(self, space, w_decoded):
         check_decoded(space, w_decoded)
-        self.text = space.unicode_w(w_decoded)
+        self.text = space.utf8_w(w_decoded)
         self.pos = 0
 
     def reset(self):
@@ -312,7 +313,7 @@
 
     def get_chars(self, size):
         if self.text is None:
-            return u""
+            return ""
 
         available = len(self.text) - self.pos
         if size < 0 or size > available:
@@ -341,7 +342,7 @@
         if self.exhausted():
             raise StopIteration
         ch = self.text[self.pos]
-        self.pos += 1
+        self.pos = next_codepoint_pos(self.text, self.pos)
         return ch
 
     def peek_char(self):
@@ -362,16 +363,16 @@
                 ch = self.next_char()
             except StopIteration:
                 return False
-            if ch == u'\n':
+            if ch == '\n':
                 return True
-            if ch == u'\r':
+            if ch == '\r':
                 if scanned >= limit:
                     return False
                 try:
                     ch = self.peek_char()
                 except StopIteration:
                     return False
-                if ch == u'\n':
+                if ch == '\n':
                     self.next_char()
                     return True
                 else:
@@ -388,11 +389,11 @@
             except StopIteration:
                 return False
             scanned += 1
-            if ch == u'\r':
+            if ch == '\r':
                 if scanned >= limit:
                     return False
                 try:
-                    if self.peek_char() == u'\n':
+                    if self.peek_char() == '\n':
                         self.next_char()
                         return True
                 except StopIteration:
@@ -420,6 +421,7 @@
     if not space.isinstance_w(w_decoded, space.w_unicode):
         msg = "decoder should return a string result, not '%T'"
         raise oefmt(space.w_TypeError, msg, w_decoded)
+    return w_decoded
 
 
 class W_TextIOWrapper(W_TextIOBase):
@@ -705,11 +707,11 @@
         else:
             if self.readtranslate:
                 # Newlines are already translated, only search for \n
-                newline = u'\n'
+                newline = '\n'
             else:
                 # Non-universal mode.
                 newline = self.readnl
-            if newline == u'\r\n':
+            if newline == '\r\n':
                 return self.decoded.find_crlf(limit)
             else:
                 return self.decoded.find_char(newline[0], limit)
@@ -945,13 +947,14 @@
 
             w_decoded = space.call_method(self.w_decoder, "decode",
                                           w_chunk, space.newbool(bool(cookie.need_eof)))
-            self.decoded.set(space, w_decoded)
+            w_decoded = check_decoded(space, w_decoded)
 
             # Skip chars_to_skip of the decoded characters
-            if len(self.decoded.text) < cookie.chars_to_skip:
+            if space.len_w(w_decoded) < cookie.chars_to_skip:
                 raise oefmt(space.w_IOError,
                             "can't restore logical file position")
-            self.decoded.pos = cookie.chars_to_skip
+            self.decoded.set(space, w_decoded)
+            self.decoded.pos = w_decoded._index_to_byte(cookie.chars_to_skip)
         else:
             self.snapshot = PositionSnapshot(cookie.dec_flags, "")
 
@@ -963,10 +966,8 @@
 
     def tell_w(self, space):
         self._check_closed(space)
-
         if not self.seekable:
             raise oefmt(space.w_IOError, "underlying stream is not seekable")
-
         if not self.telling:
             raise oefmt(space.w_IOError,
                         "telling position disabled by next() call")
@@ -992,7 +993,8 @@
             # We haven't moved from the snapshot point.
             return space.newlong_from_rbigint(cookie.pack())
 
-        chars_to_skip = self.decoded.pos
+        chars_to_skip = codepoints_in_utf8(
+            self.decoded.text, end=self.decoded.pos)
 
         # Starting from the snapshot position, we will walk the decoder
         # forward until it gives us enough decoded characters.
@@ -1036,14 +1038,14 @@
                 # We didn't get enough decoded data; signal EOF to get more.
                 w_decoded = space.call_method(self.w_decoder, "decode",
                                               space.newbytes(""),
-                                              space.newint(1)) # final=1
+                                              space.newint(1))  # final=1
                 check_decoded(space, w_decoded)
-                chars_decoded += len(space.unicode_w(w_decoded))
+                chars_decoded += space.len_w(w_decoded)
                 cookie.need_eof = 1
 
                 if chars_decoded < chars_to_skip:
                     raise oefmt(space.w_IOError,
-                                "can't reconstruct logical file position")
+                        "can't reconstruct logical file position")
         finally:
             space.call_method(self.w_decoder, "setstate", w_saved_state)
 
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -27,7 +27,8 @@
         w_newline=space.newtext(mode))
     lines = []
     while True:
-        line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+        w_line = w_textio.readline_w(space, space.newint(limit))
+        line = space.utf8_w(w_line).decode('utf-8')
         if limit > 0:
             assert len(line) <= limit
         if line:
@@ -38,31 +39,27 @@
 
 @given(st.text())
 def test_read_buffer(text):
-    buf = DecodeBuffer(text)
-    assert buf.get_chars(-1) == text
+    buf = DecodeBuffer(text.encode('utf-8'))
+    assert buf.get_chars(-1) == text.encode('utf-8')
     assert buf.exhausted()
 
 @given(st.text(), st.lists(st.integers(min_value=0)))
 def test_readn_buffer(text, sizes):
-    buf = DecodeBuffer(text)
+    buf = DecodeBuffer(text.encode('utf-8'))
     strings = []
     for n in sizes:
         s = buf.get_chars(n)
         if not buf.exhausted():
-            assert len(s) == n
+            assert len(s.decode('utf-8')) == n
         else:
-            assert len(s) <= n
+            assert len(s.decode('utf-8')) <= n
         strings.append(s)
-    assert ''.join(strings) == text[:sum(sizes)]
+    assert ''.join(strings) == text[:sum(sizes)].encode('utf-8')
 
 @given(st.text())
 def test_next_char(text):
-    buf = DecodeBuffer(text)
-    chars = []
-    try:
-        while True:
-            chars.append(buf.next_char())
-    except StopIteration:
-        pass
+    buf = DecodeBuffer(text.encode('utf-8'))
+    for i in range(len(text)):
+        ch = buf.next_char()
+        assert ch == text[i].encode('utf-8')[0]
     assert buf.exhausted()
-    assert u''.join(chars) == text
diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py
deleted file mode 100644
--- a/pypy/module/_io/test/test_ztranslation.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from pypy.objspace.fake.checkmodule import checkmodule
-
-def test_checkmodule():
-    checkmodule('_io')
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -212,6 +212,12 @@
     def newutf8(self, x, l, f):
         return w_some_obj()
 
+    def new_from_utf8(self, a):
+        return w_some_obj()
+
+    def newunicode(self, a):
+        return w_some_obj()
+
     newtext = newbytes
     newtext_or_none = newbytes
     newfilename = newbytes
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -367,6 +367,12 @@
         assert isinstance(utf8s, str)
         return W_UnicodeObject(utf8s, length, flag)
 
+    def new_from_utf8(self, utf8s):
+        # XXX: kill me!
+        assert isinstance(utf8s, str)
+        length, flag = rutf8.check_utf8(utf8s, True)
+        return W_UnicodeObject(utf8s, length, flag)
+
     def newfilename(self, s):
         assert isinstance(s, str) # on pypy3, this decodes the byte string
         return W_BytesObject(s)   # with the filesystem encoding

From pypy.commits at gmail.com  Tue Nov 28 14:29:42 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 28 Nov 2017 11:29:42 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: shut up test_whatsnew
Message-ID: <5a1db926.8bc4df0a.7daed.2181@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93204:49f614a4e075
Date: 2017-11-28 19:27 +0000
http://bitbucket.org/pypy/pypy/changeset/49f614a4e075/

Log:	shut up test_whatsnew

diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst
--- a/pypy/doc/whatsnew-pypy3-head.rst
+++ b/pypy/doc/whatsnew-pypy3-head.rst
@@ -14,3 +14,5 @@
 
 .. branch: py3.5-mac-embedding
 Download and patch dependencies when building cffi-based stdlib modules
+
+.. branch: os_lockf

From pypy.commits at gmail.com  Tue Nov 28 17:13:21 2017
From: pypy.commits at gmail.com (rlamy)
Date: Tue, 28 Nov 2017 14:13:21 -0800 (PST)
Subject: [pypy-commit] pypy default: Improve test_textio so that it detects
 the current issues
Message-ID: <5a1ddf81.83b91c0a.9bd6f.5b8c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93205:47f75e26f6cf
Date: 2017-11-28 22:11 +0000
http://bitbucket.org/pypy/pypy/changeset/47f75e26f6cf/

Log:	Improve test_textio so that it detects the current issues

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -1,28 +1,48 @@
 from hypothesis import given, strategies as st
 
 from io import BytesIO, TextIOWrapper
+import os
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace('\r\n', '\n')
+    text = text.replace('\r', '\n')
+    return text.replace('\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline_universal(
+        draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    lines = draw(st.lists(
+        st.text(st.characters(blacklist_characters='\r\n')),
+        min_size=n_lines, max_size=n_lines))
+    limits = []
+    for line in lines:
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    endings = draw(st.lists(
+        st.sampled_from(['\n', '\r', '\r\n']),
+        min_size=n_lines, max_size=n_lines))
+    return (
+        ''.join(line + ending for line, ending in zip(lines, endings)),
+        limits)
 
- at given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(txt, mode, limit):
+ at given(data=st_readline_universal(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '', None]))
+def test_readline(data, mode):
+    txt, limits = data
     textio = TextIOWrapper(
-        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+        BytesIO(txt.encode('utf-8', 'surrogatepass')),
+        encoding='utf-8', errors='surrogatepass', newline=mode)
     lines = []
-    while True:
+    for limit in limits:
         line = textio.readline(limit)
-        if limit > 0:
-            assert len(line) < limit
+        if limit >= 0:
+            assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    if mode is None:
+        txt = translate_newlines(txt)
+    assert txt.startswith(u''.join(lines))

From pypy.commits at gmail.com  Wed Nov 29 03:10:49 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 29 Nov 2017 00:10:49 -0800 (PST)
Subject: [pypy-commit] pypy win32-vcvars: maybe fix?
Message-ID: <5a1e6b89.54d91c0a.1b0ff.5d39@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vcvars
Changeset: r93206:7f64b9246539
Date: 2017-11-29 10:13 +0200
http://bitbucket.org/pypy/pypy/changeset/7f64b9246539/

Log:	maybe fix?

diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -65,12 +65,12 @@
             vcbindir = os.path.join(vcinstalldir, 'BIN')
             vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat')
         else:
-            vcvars = os.path.join(toolsdir, 'vcvars32.bat')
+            vcvars = os.path.join(toolsdir, 'vsvars32.bat')
             if not os.path.exists(vcvars):
                 # even msdn does not know which to run
                 # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx
                 # wich names both
-                vcvars = os.path.join(toolsdir, 'vsvars32.bat') 
+                vcvars = os.path.join(toolsdir, 'vcvars32.bat') 
 
     import subprocess
     try:
@@ -92,25 +92,21 @@
         key, value = line.split('=', 1)
         if key.upper() in ['PATH', 'INCLUDE', 'LIB']:
             env[key.upper()] = value
-    ## log.msg("Updated environment with %s" % (vcvars,))
+    log.msg("Updated environment with %s" % (vcvars,))
     return env
 
 def find_msvc_env(x64flag=False):
+    vcvers = [140, 100, 90, 80, 71, 70]
     # First, try to get the compiler which served to compile python
     msc_pos = sys.version.find('MSC v.')
     if msc_pos != -1:
         msc_ver = int(sys.version[msc_pos+6:msc_pos+10])
-        # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90
+        # 1500 -> 90, 1900 -> 140
         vsver = (msc_ver / 10) - 60
-        env = _get_msvc_env(vsver, x64flag)
-
-        if env is not None:
-            return env
-
-    # Then, try any other version
-    for vsver in (100, 90, 80, 71, 70): # All the versions I know
-        env = _get_msvc_env(vsver, x64flag)
-
+        vcvers.insert(0, vsver)
+    errs = []
+    for vsver in vcvers: 
+        env, errstr = _get_msvc_env(vsver, x64flag)
         if env is not None:
             return env
     log.error("Could not find a Microsoft Compiler")

From pypy.commits at gmail.com  Wed Nov 29 03:22:37 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 29 Nov 2017 00:22:37 -0800 (PST)
Subject: [pypy-commit] pypy win32-vcvars: typo
Message-ID: <5a1e6e4d.90a9df0a.6473d.8b41@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vcvars
Changeset: r93207:5d23987c65b3
Date: 2017-11-29 10:25 +0200
http://bitbucket.org/pypy/pypy/changeset/5d23987c65b3/

Log:	typo

diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -106,7 +106,7 @@
         vcvers.insert(0, vsver)
     errs = []
     for vsver in vcvers: 
-        env, errstr = _get_msvc_env(vsver, x64flag)
+        env = _get_msvc_env(vsver, x64flag)
         if env is not None:
             return env
     log.error("Could not find a Microsoft Compiler")

From pypy.commits at gmail.com  Wed Nov 29 03:42:47 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 29 Nov 2017 00:42:47 -0800 (PST)
Subject: [pypy-commit] pypy default: merge win32-vcvars,
 log more and try vsvars32 before vcvars32, go figure
Message-ID: <5a1e7307.47b0df0a.7a070.2da6@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: 
Changeset: r93209:d1aaa6aca19d
Date: 2017-11-29 10:41 +0200
http://bitbucket.org/pypy/pypy/changeset/d1aaa6aca19d/

Log:	merge win32-vcvars, log more and try vsvars32 before vcvars32, go
	figure

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -26,3 +26,6 @@
 
 .. branch: fix-vmprof-stacklet-switch
 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -65,12 +65,12 @@
             vcbindir = os.path.join(vcinstalldir, 'BIN')
             vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat')
         else:
-            vcvars = os.path.join(toolsdir, 'vcvars32.bat')
+            vcvars = os.path.join(toolsdir, 'vsvars32.bat')
             if not os.path.exists(vcvars):
                 # even msdn does not know which to run
                 # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx
                 # wich names both
-                vcvars = os.path.join(toolsdir, 'vsvars32.bat') 
+                vcvars = os.path.join(toolsdir, 'vcvars32.bat') 
 
     import subprocess
     try:
@@ -92,25 +92,21 @@
         key, value = line.split('=', 1)
         if key.upper() in ['PATH', 'INCLUDE', 'LIB']:
             env[key.upper()] = value
-    ## log.msg("Updated environment with %s" % (vcvars,))
+    log.msg("Updated environment with %s" % (vcvars,))
     return env
 
 def find_msvc_env(x64flag=False):
+    vcvers = [140, 100, 90, 80, 71, 70]
     # First, try to get the compiler which served to compile python
     msc_pos = sys.version.find('MSC v.')
     if msc_pos != -1:
         msc_ver = int(sys.version[msc_pos+6:msc_pos+10])
-        # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90
+        # 1500 -> 90, 1900 -> 140
         vsver = (msc_ver / 10) - 60
+        vcvers.insert(0, vsver)
+    errs = []
+    for vsver in vcvers: 
         env = _get_msvc_env(vsver, x64flag)
-
-        if env is not None:
-            return env
-
-    # Then, try any other version
-    for vsver in (100, 90, 80, 71, 70): # All the versions I know
-        env = _get_msvc_env(vsver, x64flag)
-
         if env is not None:
             return env
     log.error("Could not find a Microsoft Compiler")

From pypy.commits at gmail.com  Wed Nov 29 03:42:45 2017
From: pypy.commits at gmail.com (mattip)
Date: Wed, 29 Nov 2017 00:42:45 -0800 (PST)
Subject: [pypy-commit] pypy win32-vcvars: both build slaves seem happy
Message-ID: <5a1e7305.8b951c0a.2f62d.7b36@mx.google.com>

Author: Matti Picus <matti.picus at gmail.com>
Branch: win32-vcvars
Changeset: r93208:b6119a8ea747
Date: 2017-11-29 10:40 +0200
http://bitbucket.org/pypy/pypy/changeset/b6119a8ea747/

Log:	both build slaves seem happy

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -26,3 +26,6 @@
 
 .. branch: fix-vmprof-stacklet-switch
 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+

From pypy.commits at gmail.com  Wed Nov 29 09:34:19 2017
From: pypy.commits at gmail.com (cfbolz)
Date: Wed, 29 Nov 2017 06:34:19 -0800 (PST)
Subject: [pypy-commit] pypy default: - hypothesis tests for the IntBound
 methods
Message-ID: <5a1ec56b.42b2df0a.cc414.6229@mx.google.com>

Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: 
Changeset: r93210:a805f563cfd0
Date: 2017-11-29 15:33 +0100
http://bitbucket.org/pypy/pypy/changeset/a805f563cfd0/

Log:	- hypothesis tests for the IntBound methods
	- lighgly refactor optimization of some int_ ops to make hypothesis
	testing possible

diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,19 +25,6 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -50,7 +37,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the reuslt
+        # FIXME: This takes care of the instruction where box is the result
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -91,14 +78,8 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        if b1.known_ge(IntBound(0, 0)) and \
-           b2.known_ge(IntBound(0, 0)):
-            r = self.getintbound(op)
-            if b1.has_upper and b2.has_upper:
-                mostsignificant = b1.upper | b2.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
+        b = b1.or_bound(b2)
+        self.getintbound(op).intersect(b)
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -112,15 +93,8 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        r = self.getintbound(op)
-        pos1 = b1.known_ge(IntBound(0, 0))
-        pos2 = b2.known_ge(IntBound(0, 0))
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(b1)
-        if pos2:
-            r.make_le(b2)
+        b = b1.and_bound(b2)
+        self.getintbound(op).intersect(b)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -211,16 +185,10 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
+        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        if b2.is_constant():
-            val = b2.getint()
-            r = self.getintbound(op)
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
+        r = self.getintbound(op)
+        r.intersect(b1.mod_bound(b2))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -436,7 +404,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_ge(IntBound(0, 0)):
+        if b.known_nonnegative():
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -647,7 +615,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_ge(IntBound(0, 0)):
+                if b1.known_nonnegative():
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,6 +12,19 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -92,6 +105,9 @@
     def known_ge(self, other):
         return other.known_le(self)
 
+    def known_nonnegative(self):
+        return self.has_lower and 0 <= self.lower
+
     def intersect(self, other):
         r = False
 
@@ -192,10 +208,22 @@
         else:
             return IntUnbounded()
 
+    def mod_bound(self, other):
+        r = IntUnbounded()
+        if other.is_constant():
+            val = other.getint()
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
+        return r
+
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -211,7 +239,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -221,7 +249,31 @@
         else:
             return IntUnbounded()
 
+    def and_bound(self, other):
+        pos1 = self.known_nonnegative()
+        pos2 = other.known_nonnegative()
+        r = IntUnbounded()
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(self)
+        if pos2:
+            r.make_le(other)
+        return r
+
+    def or_bound(self, other):
+        r = IntUnbounded()
+        if self.known_nonnegative() and \
+                other.known_nonnegative():
+            if self.has_upper and other.has_upper:
+                mostsignificant = self.upper | other.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
+        return r
+
     def contains(self, val):
+        assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -282,7 +334,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
+        return (self.bounded() and self.known_nonnegative() and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -297,7 +349,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_ge(IntBound(0, 0)) and \
+        if self.known_nonnegative() and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,12 +1,34 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
-     IntLowerBound, IntUnbounded
-from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
+     IntLowerBound, IntUnbounded, next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT
+from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
 
-def bound(a,b):
+from hypothesis import given, strategies
+
+special_values = (
+    range(-100, 100) +
+    [2 ** i for i in range(1, LONG_BIT)] +
+    [-2 ** i for i in range(1, LONG_BIT)] +
+    [2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [sys.maxint, -sys.maxint-1])
+
+special_values = strategies.sampled_from(
+    [int(v) for v in special_values if type(int(v)) is int])
+
+ints = strategies.builds(
+    int, # strategies.integers sometimes returns a long?
+    special_values | strategies.integers(
+    min_value=int(-sys.maxint-1), max_value=sys.maxint))
+
+ints_or_none = strategies.none() | ints
+
+
+def bound(a, b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -14,11 +36,55 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a,b)
+        return IntBound(a, b)
 
 def const(a):
     return bound(a,a)
 
+
+def build_bound_with_contained_number(a, b, c):
+    a, b, c = sorted([a, b, c])
+    r = bound(a, c)
+    assert r.contains(b)
+    return r, b
+
+bound_with_contained_number = strategies.builds(
+    build_bound_with_contained_number,
+    ints_or_none,
+    ints_or_none,
+    ints
+)
+
+unbounded = strategies.builds(
+    lambda x: (bound(None, None), int(x)),
+    ints
+)
+
+lower_bounded = strategies.builds(
+    lambda x, y: (bound(min(x, y), None), max(x, y)),
+    ints,
+    ints
+)
+
+upper_bounded = strategies.builds(
+    lambda x, y: (bound(None, max(x, y)), min(x, y)),
+    ints,
+    ints
+)
+
+bounded = strategies.builds(
+    build_bound_with_contained_number,
+    ints, ints, ints
+)
+
+constant = strategies.builds(
+    lambda x: (const(x), x),
+    ints
+)
+
+bound_with_contained_number = strategies.one_of(
+    unbounded, lower_bounded, upper_bounded, constant, bounded)
+
 def some_bounds():
     brd = [None] + range(-2, 3)
     for lower in brd:
@@ -240,8 +306,6 @@
 
 
 def test_div_bound():
-    from rpython.rtyper.lltypesystem import lltype
-    from rpython.rtyper.lltypesystem.lloperation import llop
     for _, _, b1 in some_bounds():
         for _, _, b2 in some_bounds():
             b3 = b1.py_div_bound(b2)
@@ -261,6 +325,15 @@
     assert a.contains(-3)
     assert a.contains(0)
 
+def test_mod_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.mod_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        if n2 != 0:
+                            assert b3.contains(n1 % n2)   # Python-style div
 
 def test_sub_bound():
     for _, _, b1 in some_bounds():
@@ -275,6 +348,25 @@
     assert not a.contains(-1)
     assert not a.contains(4)
 
+def test_and_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.and_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 & n2)
+
+def test_or_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.or_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 | n2)
+                        assert b3.contains(n1 ^ n2) # we use it for xor too
+
 
 def test_next_pow2_m1():
     assert next_pow2_m1(0) == 0
@@ -285,3 +377,82 @@
     assert next_pow2_m1(80) == 127
     assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1
     assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1
+
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_add_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.add_bound(b2)
+    try:
+        r = ovfcheck(n1 + n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_sub_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.sub_bound(b2)
+    try:
+        r = ovfcheck(n1 - n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mul_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mul_bound(b2)
+    try:
+        r = ovfcheck(n1 * n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_div_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.py_div_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 / n2)   # Python-style div
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mod_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mod_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 % n2)   # Python-style mod
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_and_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.and_bound(b2)
+    r = n1 & n2
+    assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_or_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.or_bound(b2)
+    r = n1 | n2
+    assert b3.contains(r)
+    r = n1 ^ n2
+    assert b3.contains(r)

From pypy.commits at gmail.com  Wed Nov 29 10:20:26 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 29 Nov 2017 07:20:26 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: add a test which
 checks that vmprof is actually enabled inside greenlets;
 it fails on default and passes on this branch
Message-ID: <5a1ed03a.47b0df0a.7a070.d0d1@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93213:9ac249e058e3
Date: 2017-11-29 16:17 +0100
http://bitbucket.org/pypy/pypy/changeset/9ac249e058e3/

Log:	add a test which checks that vmprof is actually enabled inside
	greenlets; it fails on default and passes on this branch

diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
--- a/extra_tests/requirements.txt
+++ b/extra_tests/requirements.txt
@@ -1,2 +1,3 @@
 pytest
 hypothesis
+vmprof
diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_vmprof_greenlet.py
@@ -0,0 +1,28 @@
+import time
+import pytest
+import greenlet
+import vmprof
+
+def count_samples(filename):
+    stats = vmprof.read_profile(filename)
+    return len(stats.profiles)
+
+def cpuburn(duration):
+    end = time.time() + duration
+    while time.time() < end:
+        pass
+
+def test_sampling_inside_callback(tmpdir):
+    # see also test_sampling_inside_callback inside
+    # pypy/module/_continuation/test/test_stacklet.py
+    #
+    G = greenlet.greenlet(cpuburn)
+    fname = tmpdir.join('log.vmprof')
+    with fname.open('w+b') as f:
+        vmprof.enable(f.fileno(), 1/250.0)
+        G.switch(0.1)
+        vmprof.disable()
+    
+    samples = count_samples(str(fname))
+    # 0.1 seconds at 250Hz should be 25 samples
+    assert 23 < samples < 27
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -799,6 +799,9 @@
 
     def test_sampling_inside_callback(self):
         if self.runappdirect:
+            # see also
+            # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback
+            # for a "translated" version of this test
             skip("we can't run this until we have _vmprof.is_sampling_enabled")
         from _continuation import continulet
         #

From pypy.commits at gmail.com  Wed Nov 29 10:20:22 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 29 Nov 2017 07:20:22 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: skip this when run
 with -A
Message-ID: <5a1ed036.9085df0a.fe17c.b828@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93211:37b5a39510e7
Date: 2017-11-29 11:25 +0100
http://bitbucket.org/pypy/pypy/changeset/37b5a39510e7/

Log:	skip this when run with -A

diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -798,6 +798,8 @@
         raises(error, continulet.switch, c1, to=c2)
 
     def test_sampling_inside_callback(self):
+        if self.runappdirect:
+            skip("we can't run this until we have _vmprof.is_sampling_enabled")
         from _continuation import continulet
         #
         def my_callback(c1):

From pypy.commits at gmail.com  Wed Nov 29 10:20:28 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 29 Nov 2017 07:20:28 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: document this
 branch
Message-ID: <5a1ed03c.c7a4df0a.6e59d.622d@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93214:f7fa5a2b8f51
Date: 2017-11-29 16:19 +0100
http://bitbucket.org/pypy/pypy/changeset/f7fa5a2b8f51/

Log:	document this branch

diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -25,4 +25,6 @@
 Upgrade the _vmprof backend to vmprof 0.4.10
 
 .. branch: fix-vmprof-stacklet-switch
-Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+.. branch: fix-vmprof-stacklet-switch-2
+Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...)
+

From pypy.commits at gmail.com  Wed Nov 29 10:20:24 2017
From: pypy.commits at gmail.com (antocuni)
Date: Wed, 29 Nov 2017 07:20:24 -0800 (PST)
Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: simplify
Message-ID: <5a1ed038.0abadf0a.a9a6c.752d@mx.google.com>

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: fix-vmprof-stacklet-switch-2
Changeset: r93212:4776dc8a84f7
Date: 2017-11-29 16:16 +0100
http://bitbucket.org/pypy/pypy/changeset/4776dc8a84f7/

Log:	simplify

diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -1,4 +1,5 @@
 import py
+import pytest
 try:
     import _continuation
 except ImportError:
@@ -101,11 +102,7 @@
         particular, we need to ensure that vmprof does not sample the stack in
         the middle of a switch, else we read nonsense.
         """
-        try:
-            import _vmprof
-        except ImportError:
-            py.test.skip("no _vmprof")
-        #
+        _vmprof = pytest.importorskip('_vmprof')
         def switch_forever(c):
             while True:
                 c.switch()

From pypy.commits at gmail.com  Wed Nov 29 13:34:12 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 29 Nov 2017 10:34:12 -0800 (PST)
Subject: [pypy-commit] pypy default: Improve interp-level test and fix bugs
 in W_TextIOWrapper.readline_w()
Message-ID: <5a1efda4.0e97df0a.8f776.4026@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: 
Changeset: r93215:b2e4b128808e
Date: 2017-11-29 18:32 +0000
http://bitbucket.org/pypy/pypy/changeset/b2e4b128808e/

Log:	Improve interp-level test and fix bugs in
	W_TextIOWrapper.readline_w()

diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -353,6 +353,7 @@
         while scanned < limit:
             try:
                 ch = self.next_char()
+                scanned += 1
             except StopIteration:
                 return False
             if ch == u'\n':
@@ -737,7 +738,7 @@
                     remnant = None
                     continue
 
-            if limit > 0:
+            if limit >= 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,40 +1,53 @@
 import pytest
 try:
-    from hypothesis import given, strategies as st, assume
+    from hypothesis import given, strategies as st
 except ImportError:
     pytest.skip("hypothesis required")
+import os
 from pypy.module._io.interp_bytesio import W_BytesIO
 from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace(u'\r\n', u'\n')
+    text = text.replace(u'\r', u'\n')
+    return text.replace(u'\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    fragments = []
+    limits = []
+    for _ in range(n_lines):
+        line = draw(st.text(st.characters(blacklist_characters=u'\r\n')))
+        fragments.append(line)
+        ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n']))
+        fragments.append(ending)
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    return (u''.join(fragments), limits)
 
- at given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(space, txt, mode, limit):
-    assume(limit != 0)
+ at given(data=st_readline(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']))
+def test_readline(space, data, mode):
+    txt, limits = data
     w_stream = W_BytesIO(space)
     w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
     w_textio = W_TextIOWrapper(space)
     w_textio.descr_init(
-        space, w_stream, encoding='utf-8',
+        space, w_stream,
+        encoding='utf-8', w_errors=space.newtext('surrogatepass'),
         w_newline=space.newtext(mode))
     lines = []
-    while True:
+    for limit in limits:
         line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
-        if limit > 0:
+        if limit >= 0:
             assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    assert txt.startswith(u''.join(lines))
 
 @given(st.text())
 def test_read_buffer(text):

From pypy.commits at gmail.com  Wed Nov 29 13:36:01 2017
From: pypy.commits at gmail.com (rlamy)
Date: Wed, 29 Nov 2017 10:36:01 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a1efe11.c23a1c0a.8ea05.4bab@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93216:fcf5d3fb56f4
Date: 2017-11-29 18:34 +0000
http://bitbucket.org/pypy/pypy/changeset/fcf5d3fb56f4/

Log:	hg merge default

diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
--- a/extra_tests/test_textio.py
+++ b/extra_tests/test_textio.py
@@ -1,28 +1,48 @@
 from hypothesis import given, strategies as st
 
 from io import BytesIO, TextIOWrapper
+import os
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace('\r\n', '\n')
+    text = text.replace('\r', '\n')
+    return text.replace('\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline_universal(
+        draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    lines = draw(st.lists(
+        st.text(st.characters(blacklist_characters='\r\n')),
+        min_size=n_lines, max_size=n_lines))
+    limits = []
+    for line in lines:
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    endings = draw(st.lists(
+        st.sampled_from(['\n', '\r', '\r\n']),
+        min_size=n_lines, max_size=n_lines))
+    return (
+        ''.join(line + ending for line, ending in zip(lines, endings)),
+        limits)
 
- at given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(txt, mode, limit):
+ at given(data=st_readline_universal(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '', None]))
+def test_readline(data, mode):
+    txt, limits = data
     textio = TextIOWrapper(
-        BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode)
+        BytesIO(txt.encode('utf-8', 'surrogatepass')),
+        encoding='utf-8', errors='surrogatepass', newline=mode)
     lines = []
-    while True:
+    for limit in limits:
         line = textio.readline(limit)
-        if limit > 0:
-            assert len(line) < limit
+        if limit >= 0:
+            assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    if mode is None:
+        txt = translate_newlines(txt)
+    assert txt.startswith(u''.join(lines))
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -26,3 +26,6 @@
 
 .. branch: fix-vmprof-stacklet-switch
 Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -25,8 +25,10 @@
 
 This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has
 made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link
-was checked in Nov 2016). Note that the compiler suite will be installed in
-``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``.
+was checked in Nov 2016). Note that the compiler suite may be installed in
+``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``
+or in
+``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``.
 A current version of ``setuptools`` will be able to find it there. For
 Windows 10, you must right-click the download, and under ``Properties`` ->
 ``Compatibility`` mark it as ``Run run this program in comatibility mode for``
@@ -41,7 +43,6 @@
 -----------------------------------
 
 We routinely test translation using v9, also known as Visual Studio 2008.
-Our buildbot is still using the Express Edition, not the compiler noted above.
 Other configurations may work as well.
 
 The translation scripts will set up the appropriate environment variables
@@ -81,6 +82,30 @@
 
 .. _build instructions: http://pypy.org/download.html#building-from-source
 
+Setting Up Visual Studio for building SSL in Python3
+----------------------------------------------------
+
+On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after
+translation. However ``distutils`` does not support the Micorosft-provided Visual C
+compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The
+traditional solution to this problem is to install the ``setuptools`` module
+via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However
+``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on
+``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which
+depends on ``ssl``.
+
+In order to solve this, the buildbot sets an environment varaible that helps
+``distutils`` find the compiler without ``setuptools``::
+
+     set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin
+
+or whatever is appropriate for your machine. Note that this is not enough, you
+must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
+``...\9.0\VC`` directory, and edit it, changing the lines that set
+``VCINSTALLDIR`` and ``WindowsSdkDir``::
+    set VCINSTALLDIR=%~dp0\
+    set WindowsSdkDir=%~dp0\..\WinSDK\
+
 
 Preparing Windows for the large build
 -------------------------------------
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -368,6 +368,7 @@
         while scanned < limit:
             try:
                 ch = self.next_char()
+                scanned += 1
             except StopIteration:
                 return False
             if ch == u'\n':
@@ -780,7 +781,7 @@
                     remnant = None
                     continue
 
-            if limit > 0:
+            if limit >= 0:
                 remaining = limit - builder.getlength()
                 assert remaining >= 0
             else:
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -1,40 +1,53 @@
 import pytest
 try:
-    from hypothesis import given, strategies as st, assume
+    from hypothesis import given, strategies as st
 except ImportError:
     pytest.skip("hypothesis required")
+import os
 from pypy.module._io.interp_bytesio import W_BytesIO
 from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer
 
-LINESEP = ['', '\r', '\n', '\r\n']
+def translate_newlines(text):
+    text = text.replace(u'\r\n', u'\n')
+    text = text.replace(u'\r', u'\n')
+    return text.replace(u'\n', os.linesep)
 
 @st.composite
-def text_with_newlines(draw):
-    sep = draw(st.sampled_from(LINESEP))
-    lines = draw(st.lists(st.text(max_size=10), max_size=10))
-    return sep.join(lines)
+def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    fragments = []
+    limits = []
+    for _ in range(n_lines):
+        line = draw(st.text(st.characters(blacklist_characters=u'\r\n')))
+        fragments.append(line)
+        ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n']))
+        fragments.append(ending)
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    return (u''.join(fragments), limits)
 
- at given(txt=text_with_newlines(),
-       mode=st.sampled_from(['\r', '\n', '\r\n', '']),
-       limit=st.integers(min_value=-1))
-def test_readline(space, txt, mode, limit):
-    assume(limit != 0)
+ at given(data=st_readline(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '']))
+def test_readline(space, data, mode):
+    txt, limits = data
     w_stream = W_BytesIO(space)
     w_stream.descr_init(space, space.newbytes(txt.encode('utf-8')))
     w_textio = W_TextIOWrapper(space)
     w_textio.descr_init(
-        space, w_stream, encoding='utf-8',
+        space, w_stream,
+        encoding='utf-8', w_errors=space.newtext('surrogatepass'),
         w_newline=space.newtext(mode))
     lines = []
-    while True:
+    for limit in limits:
         line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
-        if limit > 0:
+        if limit >= 0:
             assert len(line) <= limit
         if line:
             lines.append(line)
-        else:
+        elif limit:
             break
-    assert u''.join(lines) == txt
+    assert txt.startswith(u''.join(lines))
 
 @given(st.text())
 def test_read_buffer(text):
diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,19 +25,6 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -50,7 +37,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the reuslt
+        # FIXME: This takes care of the instruction where box is the result
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -91,14 +78,8 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        if b1.known_ge(IntBound(0, 0)) and \
-           b2.known_ge(IntBound(0, 0)):
-            r = self.getintbound(op)
-            if b1.has_upper and b2.has_upper:
-                mostsignificant = b1.upper | b2.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
+        b = b1.or_bound(b2)
+        self.getintbound(op).intersect(b)
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -112,15 +93,8 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        r = self.getintbound(op)
-        pos1 = b1.known_ge(IntBound(0, 0))
-        pos2 = b2.known_ge(IntBound(0, 0))
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(b1)
-        if pos2:
-            r.make_le(b2)
+        b = b1.and_bound(b2)
+        self.getintbound(op).intersect(b)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -211,16 +185,10 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
+        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        if b2.is_constant():
-            val = b2.getint()
-            r = self.getintbound(op)
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
+        r = self.getintbound(op)
+        r.intersect(b1.mod_bound(b2))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -436,7 +404,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_ge(IntBound(0, 0)):
+        if b.known_nonnegative():
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -647,7 +615,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_ge(IntBound(0, 0)):
+                if b1.known_nonnegative():
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,6 +12,19 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -92,6 +105,9 @@
     def known_ge(self, other):
         return other.known_le(self)
 
+    def known_nonnegative(self):
+        return self.has_lower and 0 <= self.lower
+
     def intersect(self, other):
         r = False
 
@@ -192,10 +208,22 @@
         else:
             return IntUnbounded()
 
+    def mod_bound(self, other):
+        r = IntUnbounded()
+        if other.is_constant():
+            val = other.getint()
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
+        return r
+
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -211,7 +239,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -221,7 +249,31 @@
         else:
             return IntUnbounded()
 
+    def and_bound(self, other):
+        pos1 = self.known_nonnegative()
+        pos2 = other.known_nonnegative()
+        r = IntUnbounded()
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(self)
+        if pos2:
+            r.make_le(other)
+        return r
+
+    def or_bound(self, other):
+        r = IntUnbounded()
+        if self.known_nonnegative() and \
+                other.known_nonnegative():
+            if self.has_upper and other.has_upper:
+                mostsignificant = self.upper | other.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
+        return r
+
     def contains(self, val):
+        assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -282,7 +334,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
+        return (self.bounded() and self.known_nonnegative() and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -297,7 +349,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_ge(IntBound(0, 0)) and \
+        if self.known_nonnegative() and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,12 +1,34 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
-     IntLowerBound, IntUnbounded
-from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
+     IntLowerBound, IntUnbounded, next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT
+from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
 
-def bound(a,b):
+from hypothesis import given, strategies
+
+special_values = (
+    range(-100, 100) +
+    [2 ** i for i in range(1, LONG_BIT)] +
+    [-2 ** i for i in range(1, LONG_BIT)] +
+    [2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [sys.maxint, -sys.maxint-1])
+
+special_values = strategies.sampled_from(
+    [int(v) for v in special_values if type(int(v)) is int])
+
+ints = strategies.builds(
+    int, # strategies.integers sometimes returns a long?
+    special_values | strategies.integers(
+    min_value=int(-sys.maxint-1), max_value=sys.maxint))
+
+ints_or_none = strategies.none() | ints
+
+
+def bound(a, b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -14,11 +36,55 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a,b)
+        return IntBound(a, b)
 
 def const(a):
     return bound(a,a)
 
+
+def build_bound_with_contained_number(a, b, c):
+    a, b, c = sorted([a, b, c])
+    r = bound(a, c)
+    assert r.contains(b)
+    return r, b
+
+bound_with_contained_number = strategies.builds(
+    build_bound_with_contained_number,
+    ints_or_none,
+    ints_or_none,
+    ints
+)
+
+unbounded = strategies.builds(
+    lambda x: (bound(None, None), int(x)),
+    ints
+)
+
+lower_bounded = strategies.builds(
+    lambda x, y: (bound(min(x, y), None), max(x, y)),
+    ints,
+    ints
+)
+
+upper_bounded = strategies.builds(
+    lambda x, y: (bound(None, max(x, y)), min(x, y)),
+    ints,
+    ints
+)
+
+bounded = strategies.builds(
+    build_bound_with_contained_number,
+    ints, ints, ints
+)
+
+constant = strategies.builds(
+    lambda x: (const(x), x),
+    ints
+)
+
+bound_with_contained_number = strategies.one_of(
+    unbounded, lower_bounded, upper_bounded, constant, bounded)
+
 def some_bounds():
     brd = [None] + range(-2, 3)
     for lower in brd:
@@ -240,8 +306,6 @@
 
 
 def test_div_bound():
-    from rpython.rtyper.lltypesystem import lltype
-    from rpython.rtyper.lltypesystem.lloperation import llop
     for _, _, b1 in some_bounds():
         for _, _, b2 in some_bounds():
             b3 = b1.py_div_bound(b2)
@@ -261,6 +325,15 @@
     assert a.contains(-3)
     assert a.contains(0)
 
+def test_mod_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.mod_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        if n2 != 0:
+                            assert b3.contains(n1 % n2)   # Python-style div
 
 def test_sub_bound():
     for _, _, b1 in some_bounds():
@@ -275,6 +348,25 @@
     assert not a.contains(-1)
     assert not a.contains(4)
 
+def test_and_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.and_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 & n2)
+
+def test_or_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.or_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 | n2)
+                        assert b3.contains(n1 ^ n2) # we use it for xor too
+
 
 def test_next_pow2_m1():
     assert next_pow2_m1(0) == 0
@@ -285,3 +377,82 @@
     assert next_pow2_m1(80) == 127
     assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1
     assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1
+
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_add_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.add_bound(b2)
+    try:
+        r = ovfcheck(n1 + n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_sub_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.sub_bound(b2)
+    try:
+        r = ovfcheck(n1 - n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mul_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mul_bound(b2)
+    try:
+        r = ovfcheck(n1 * n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_div_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.py_div_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 / n2)   # Python-style div
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mod_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mod_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 % n2)   # Python-style mod
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_and_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.and_bound(b2)
+    r = n1 & n2
+    assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_or_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.or_bound(b2)
+    r = n1 | n2
+    assert b3.contains(r)
+    r = n1 ^ n2
+    assert b3.contains(r)
diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py
--- a/rpython/translator/platform/test/test_platform.py
+++ b/rpython/translator/platform/test/test_platform.py
@@ -113,8 +113,10 @@
     def test_environment_inheritance(self):
         # make sure that environment is inherited
         cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]'
+        env = {'_SOME_VARIABLE_1':'xyz'}
+        env['PATH'] = os.environ['PATH']
         res = self.platform.execute(sys.executable, ['-c', cmd % 1],
-                                    env={'_SOME_VARIABLE_1':'xyz'})
+                                    env=env)
         assert 'xyz' in res.out
         os.environ['_SOME_VARIABLE_2'] = 'zyz'
         try:
diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py
--- a/rpython/translator/platform/windows.py
+++ b/rpython/translator/platform/windows.py
@@ -10,21 +10,13 @@
 rpydir = str(py.path.local(rpython.__file__).join('..'))
 
 def _get_compiler_type(cc, x64_flag):
-    import subprocess
     if not cc:
         cc = os.environ.get('CC','')
     if not cc:
         return MsvcPlatform(x64=x64_flag)
     elif cc.startswith('mingw') or cc == 'gcc':
         return MingwPlatform(cc)
-    else:
-        return MsvcPlatform(cc=cc, x64=x64_flag)
-    try:
-        subprocess.check_output([cc, '--version'])
-    except:
-        raise ValueError("Could not find compiler specified by cc option '%s',"
-                         " it must be a valid exe file on your path" % cc)
-    return MingwPlatform(cc)
+    return MsvcPlatform(cc=cc, x64=x64_flag)
 
 def Windows(cc=None):
     return _get_compiler_type(cc, False)
@@ -74,6 +66,11 @@
             vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat')
         else:
             vcvars = os.path.join(toolsdir, 'vsvars32.bat')
+            if not os.path.exists(vcvars):
+                # even msdn does not know which to run
+                # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx
+                # wich names both
+                vcvars = os.path.join(toolsdir, 'vcvars32.bat') 
 
     import subprocess
     try:
@@ -95,25 +92,21 @@
         key, value = line.split('=', 1)
         if key.upper() in ['PATH', 'INCLUDE', 'LIB']:
             env[key.upper()] = value
-    ## log.msg("Updated environment with %s" % (vcvars,))
+    log.msg("Updated environment with %s" % (vcvars,))
     return env
 
 def find_msvc_env(x64flag=False):
+    vcvers = [140, 100, 90, 80, 71, 70]
     # First, try to get the compiler which served to compile python
     msc_pos = sys.version.find('MSC v.')
     if msc_pos != -1:
         msc_ver = int(sys.version[msc_pos+6:msc_pos+10])
-        # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90
+        # 1500 -> 90, 1900 -> 140
         vsver = (msc_ver / 10) - 60
+        vcvers.insert(0, vsver)
+    errs = []
+    for vsver in vcvers: 
         env = _get_msvc_env(vsver, x64flag)
-
-        if env is not None:
-            return env
-
-    # Then, try any other version
-    for vsver in (100, 90, 80, 71, 70): # All the versions I know
-        env = _get_msvc_env(vsver, x64flag)
-
         if env is not None:
             return env
     log.error("Could not find a Microsoft Compiler")

From pypy.commits at gmail.com  Thu Nov 30 11:34:02 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 08:34:02 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: A branch to try to use mmap()
 instead of malloc() for arenas from the GC
Message-ID: <5a2032fa.c7a4df0a.6e59d.4e94@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: mmap-for-arenas
Changeset: r93217:75f5a5c594a8
Date: 2017-11-30 16:54 +0100
http://bitbucket.org/pypy/pypy/changeset/75f5a5c594a8/

Log:	A branch to try to use mmap() instead of malloc() for arenas from
	the GC


From pypy.commits at gmail.com  Thu Nov 30 11:34:05 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 08:34:05 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: (fijal, arigo)
Message-ID: <5a2032fd.8b951c0a.2f62d.aa35@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: mmap-for-arenas
Changeset: r93218:6412ce4e1198
Date: 2017-11-30 17:30 +0100
http://bitbucket.org/pypy/pypy/changeset/6412ce4e1198/

Log:	(fijal, arigo)

	Trying to use mmap() to allocate arenas

diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py
--- a/rpython/memory/gc/minimarkpage.py
+++ b/rpython/memory/gc/minimarkpage.py
@@ -292,7 +292,7 @@
         #
         # 'arena_base' points to the start of malloced memory; it might not
         # be a page-aligned address
-        arena_base = llarena.arena_malloc(self.arena_size, False)
+        arena_base = llarena.arena_mmap(self.arena_size)
         if not arena_base:
             out_of_memory("out of memory: couldn't allocate the next arena")
         arena_end = arena_base + self.arena_size
@@ -395,8 +395,7 @@
                 if arena.nfreepages == arena.totalpages:
                     #
                     # The whole arena is empty.  Free it.
-                    llarena.arena_reset(arena.base, self.arena_size, 4)
-                    llarena.arena_free(arena.base)
+                    llarena.arena_munmap(arena.base, self.arena_size)
                     lltype.free(arena, flavor='raw', track_allocation=False)
                     #
                 else:
diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py
--- a/rpython/rtyper/lltypesystem/llarena.py
+++ b/rpython/rtyper/lltypesystem/llarena.py
@@ -327,6 +327,16 @@
     assert not arena_addr.arena.objectptrs
     arena_addr.arena.mark_freed()
 
+def arena_mmap(nbytes):
+    """Allocate and return a new arena, zero-initialized by the
+    system, calling mmap()."""
+    return arena_malloc(nbytes, True)
+
+def arena_munmap(arena_addr):
+    """Release an arena allocated with arena_mmap()."""
+    arena_free(arena_addr)
+
+
 def arena_reset(arena_addr, size, zero):
     """Free all objects in the arena, which can then be reused.
     This can also be used on a subrange of the arena.
@@ -530,6 +540,30 @@
                   llfakeimpl=arena_free,
                   sandboxsafe=True)
 
+def llimpl_arena_mmap(nbytes):
+    from rpython.rlib import rmmap
+    flags = rmmap.MAP_PRIVATE | rmmap.MAP_ANONYMOUS
+    prot = rmmap.PROT_READ | rmmap.PROT_WRITE
+    p = rffi.cast(llmemory.Address, rmmap.c_mmap_safe(
+        lltype.nullptr(rmmap.PTR.TO), nbytes, prot, flags, -1, 0))
+    if p == rffi.cast(llmemory.Address, -1):
+        p = rffi.cast(llmemory.Address, 0)
+    return p
+register_external(arena_mmap, [int], llmemory.Address,
+                  'll_arena.arena_mmap',
+                  llimpl=llimpl_arena_mmap,
+                  llfakeimpl=arena_mmap,
+                  sandboxsafe=True)
+
+def llimpl_arena_munmap(arena_addr, nbytes):
+    from rpython.rlib import rmmap
+    rmmap.c_munmap_safe(rffi.cast(rmmap.PTR, arena_addr), nbytes)
+register_external(arena_munmap, [llmemory.Address, int], None,
+                  'll_arena.arena_munmap',
+                  llimpl=llimpl_arena_munmap,
+                  llfakeimpl=arena_munmap,
+                  sandboxsafe=True)
+
 def llimpl_arena_reset(arena_addr, size, zero):
     if zero:
         if zero == 1:

From pypy.commits at gmail.com  Thu Nov 30 11:39:34 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 08:39:34 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: test fix
Message-ID: <5a203446.6b88df0a.a6cc5.4856@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: mmap-for-arenas
Changeset: r93219:4c75975c98db
Date: 2017-11-30 17:37 +0100
http://bitbucket.org/pypy/pypy/changeset/4c75975c98db/

Log:	test fix

diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py
--- a/rpython/rtyper/lltypesystem/llarena.py
+++ b/rpython/rtyper/lltypesystem/llarena.py
@@ -332,9 +332,10 @@
     system, calling mmap()."""
     return arena_malloc(nbytes, True)
 
-def arena_munmap(arena_addr):
+def arena_munmap(arena_addr, nbytes):
     """Release an arena allocated with arena_mmap()."""
     arena_free(arena_addr)
+    assert nbytes == arena_addr.arena.nbytes
 
 
 def arena_reset(arena_addr, size, zero):

From pypy.commits at gmail.com  Thu Nov 30 11:45:03 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 30 Nov 2017 08:45:03 -0800 (PST)
Subject: [pypy-commit] pypy default: backout changes that broke translation
 in unclear ways (thanks RPython)
Message-ID: <5a20358f.06b6df0a.a3bf2.b09f@mx.google.com>

Author: fijal
Branch: 
Changeset: r93220:30c6fda0a499
Date: 2017-11-30 18:38 +0200
http://bitbucket.org/pypy/pypy/changeset/30c6fda0a499/

Log:	backout changes that broke translation in unclear ways (thanks
	RPython)

diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,6 +25,19 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -37,7 +50,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the result
+        # FIXME: This takes care of the instruction where box is the reuslt
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -78,8 +91,14 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        b = b1.or_bound(b2)
-        self.getintbound(op).intersect(b)
+        if b1.known_ge(IntBound(0, 0)) and \
+           b2.known_ge(IntBound(0, 0)):
+            r = self.getintbound(op)
+            if b1.has_upper and b2.has_upper:
+                mostsignificant = b1.upper | b2.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -93,8 +112,15 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        b = b1.and_bound(b2)
-        self.getintbound(op).intersect(b)
+        r = self.getintbound(op)
+        pos1 = b1.known_ge(IntBound(0, 0))
+        pos2 = b2.known_ge(IntBound(0, 0))
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(b1)
+        if pos2:
+            r.make_le(b2)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -185,10 +211,16 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
-        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        r = self.getintbound(op)
-        r.intersect(b1.mod_bound(b2))
+        if b2.is_constant():
+            val = b2.getint()
+            r = self.getintbound(op)
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -404,7 +436,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_nonnegative():
+        if b.known_ge(IntBound(0, 0)):
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -615,7 +647,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_nonnegative():
+                if b1.known_ge(IntBound(0, 0)):
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,19 +12,6 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -105,9 +92,6 @@
     def known_ge(self, other):
         return other.known_le(self)
 
-    def known_nonnegative(self):
-        return self.has_lower and 0 <= self.lower
-
     def intersect(self, other):
         r = False
 
@@ -208,22 +192,10 @@
         else:
             return IntUnbounded()
 
-    def mod_bound(self, other):
-        r = IntUnbounded()
-        if other.is_constant():
-            val = other.getint()
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
-        return r
-
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_nonnegative() and \
+           other.known_ge(IntBound(0, 0)) and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -239,7 +211,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_nonnegative() and \
+           other.known_ge(IntBound(0, 0)) and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -249,31 +221,7 @@
         else:
             return IntUnbounded()
 
-    def and_bound(self, other):
-        pos1 = self.known_nonnegative()
-        pos2 = other.known_nonnegative()
-        r = IntUnbounded()
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(self)
-        if pos2:
-            r.make_le(other)
-        return r
-
-    def or_bound(self, other):
-        r = IntUnbounded()
-        if self.known_nonnegative() and \
-                other.known_nonnegative():
-            if self.has_upper and other.has_upper:
-                mostsignificant = self.upper | other.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
-        return r
-
     def contains(self, val):
-        assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -334,7 +282,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_nonnegative() and
+        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -349,7 +297,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_nonnegative() and \
+        if self.known_ge(IntBound(0, 0)) and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,34 +1,12 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
-     IntLowerBound, IntUnbounded, next_pow2_m1
+     IntLowerBound, IntUnbounded
+from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
+from rpython.rlib.rarithmetic import LONG_BIT
 
-from hypothesis import given, strategies
-
-special_values = (
-    range(-100, 100) +
-    [2 ** i for i in range(1, LONG_BIT)] +
-    [-2 ** i for i in range(1, LONG_BIT)] +
-    [2 ** i - 1 for i in range(1, LONG_BIT)] +
-    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
-    [2 ** i + 1 for i in range(1, LONG_BIT)] +
-    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
-    [sys.maxint, -sys.maxint-1])
-
-special_values = strategies.sampled_from(
-    [int(v) for v in special_values if type(int(v)) is int])
-
-ints = strategies.builds(
-    int, # strategies.integers sometimes returns a long?
-    special_values | strategies.integers(
-    min_value=int(-sys.maxint-1), max_value=sys.maxint))
-
-ints_or_none = strategies.none() | ints
-
-
-def bound(a, b):
+def bound(a,b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -36,55 +14,11 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a, b)
+        return IntBound(a,b)
 
 def const(a):
     return bound(a,a)
 
-
-def build_bound_with_contained_number(a, b, c):
-    a, b, c = sorted([a, b, c])
-    r = bound(a, c)
-    assert r.contains(b)
-    return r, b
-
-bound_with_contained_number = strategies.builds(
-    build_bound_with_contained_number,
-    ints_or_none,
-    ints_or_none,
-    ints
-)
-
-unbounded = strategies.builds(
-    lambda x: (bound(None, None), int(x)),
-    ints
-)
-
-lower_bounded = strategies.builds(
-    lambda x, y: (bound(min(x, y), None), max(x, y)),
-    ints,
-    ints
-)
-
-upper_bounded = strategies.builds(
-    lambda x, y: (bound(None, max(x, y)), min(x, y)),
-    ints,
-    ints
-)
-
-bounded = strategies.builds(
-    build_bound_with_contained_number,
-    ints, ints, ints
-)
-
-constant = strategies.builds(
-    lambda x: (const(x), x),
-    ints
-)
-
-bound_with_contained_number = strategies.one_of(
-    unbounded, lower_bounded, upper_bounded, constant, bounded)
-
 def some_bounds():
     brd = [None] + range(-2, 3)
     for lower in brd:
@@ -306,6 +240,8 @@
 
 
 def test_div_bound():
+    from rpython.rtyper.lltypesystem import lltype
+    from rpython.rtyper.lltypesystem.lloperation import llop
     for _, _, b1 in some_bounds():
         for _, _, b2 in some_bounds():
             b3 = b1.py_div_bound(b2)
@@ -325,15 +261,6 @@
     assert a.contains(-3)
     assert a.contains(0)
 
-def test_mod_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.mod_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        if n2 != 0:
-                            assert b3.contains(n1 % n2)   # Python-style div
 
 def test_sub_bound():
     for _, _, b1 in some_bounds():
@@ -348,25 +275,6 @@
     assert not a.contains(-1)
     assert not a.contains(4)
 
-def test_and_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.and_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        assert b3.contains(n1 & n2)
-
-def test_or_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.or_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        assert b3.contains(n1 | n2)
-                        assert b3.contains(n1 ^ n2) # we use it for xor too
-
 
 def test_next_pow2_m1():
     assert next_pow2_m1(0) == 0
@@ -377,82 +285,3 @@
     assert next_pow2_m1(80) == 127
     assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1
     assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1
-
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_add_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    print b1, n1
-    print b2, n2
-    b3 = b1.add_bound(b2)
-    try:
-        r = ovfcheck(n1 + n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_sub_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    print b1, n1
-    print b2, n2
-    b3 = b1.sub_bound(b2)
-    try:
-        r = ovfcheck(n1 - n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_mul_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.mul_bound(b2)
-    try:
-        r = ovfcheck(n1 * n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_div_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.py_div_bound(b2)
-    if n1 == -sys.maxint-1 and n2 == -1:
-        return # overflow
-    if n2 != 0:
-        assert b3.contains(n1 / n2)   # Python-style div
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_mod_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.mod_bound(b2)
-    if n1 == -sys.maxint-1 and n2 == -1:
-        return # overflow
-    if n2 != 0:
-        assert b3.contains(n1 % n2)   # Python-style mod
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_and_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.and_bound(b2)
-    r = n1 & n2
-    assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_or_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.or_bound(b2)
-    r = n1 | n2
-    assert b3.contains(r)
-    r = n1 ^ n2
-    assert b3.contains(r)

From pypy.commits at gmail.com  Thu Nov 30 11:45:05 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 30 Nov 2017 08:45:05 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: merge default
Message-ID: <5a203591.13811c0a.a506f.122f@mx.google.com>

Author: fijal
Branch: mmap-for-arenas
Changeset: r93221:6fb9f1a724da
Date: 2017-11-30 18:38 +0200
http://bitbucket.org/pypy/pypy/changeset/6fb9f1a724da/

Log:	merge default

diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,6 +25,19 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -37,7 +50,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the result
+        # FIXME: This takes care of the instruction where box is the reuslt
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -78,8 +91,14 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        b = b1.or_bound(b2)
-        self.getintbound(op).intersect(b)
+        if b1.known_ge(IntBound(0, 0)) and \
+           b2.known_ge(IntBound(0, 0)):
+            r = self.getintbound(op)
+            if b1.has_upper and b2.has_upper:
+                mostsignificant = b1.upper | b2.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -93,8 +112,15 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        b = b1.and_bound(b2)
-        self.getintbound(op).intersect(b)
+        r = self.getintbound(op)
+        pos1 = b1.known_ge(IntBound(0, 0))
+        pos2 = b2.known_ge(IntBound(0, 0))
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(b1)
+        if pos2:
+            r.make_le(b2)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -185,10 +211,16 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
-        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        r = self.getintbound(op)
-        r.intersect(b1.mod_bound(b2))
+        if b2.is_constant():
+            val = b2.getint()
+            r = self.getintbound(op)
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -404,7 +436,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_nonnegative():
+        if b.known_ge(IntBound(0, 0)):
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -615,7 +647,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_nonnegative():
+                if b1.known_ge(IntBound(0, 0)):
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,19 +12,6 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -105,9 +92,6 @@
     def known_ge(self, other):
         return other.known_le(self)
 
-    def known_nonnegative(self):
-        return self.has_lower and 0 <= self.lower
-
     def intersect(self, other):
         r = False
 
@@ -208,22 +192,10 @@
         else:
             return IntUnbounded()
 
-    def mod_bound(self, other):
-        r = IntUnbounded()
-        if other.is_constant():
-            val = other.getint()
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
-        return r
-
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_nonnegative() and \
+           other.known_ge(IntBound(0, 0)) and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -239,7 +211,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_nonnegative() and \
+           other.known_ge(IntBound(0, 0)) and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -249,31 +221,7 @@
         else:
             return IntUnbounded()
 
-    def and_bound(self, other):
-        pos1 = self.known_nonnegative()
-        pos2 = other.known_nonnegative()
-        r = IntUnbounded()
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(self)
-        if pos2:
-            r.make_le(other)
-        return r
-
-    def or_bound(self, other):
-        r = IntUnbounded()
-        if self.known_nonnegative() and \
-                other.known_nonnegative():
-            if self.has_upper and other.has_upper:
-                mostsignificant = self.upper | other.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
-        return r
-
     def contains(self, val):
-        assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -334,7 +282,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_nonnegative() and
+        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -349,7 +297,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_nonnegative() and \
+        if self.known_ge(IntBound(0, 0)) and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,34 +1,12 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
-     IntLowerBound, IntUnbounded, next_pow2_m1
+     IntLowerBound, IntUnbounded
+from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
+from rpython.rlib.rarithmetic import LONG_BIT
 
-from hypothesis import given, strategies
-
-special_values = (
-    range(-100, 100) +
-    [2 ** i for i in range(1, LONG_BIT)] +
-    [-2 ** i for i in range(1, LONG_BIT)] +
-    [2 ** i - 1 for i in range(1, LONG_BIT)] +
-    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
-    [2 ** i + 1 for i in range(1, LONG_BIT)] +
-    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
-    [sys.maxint, -sys.maxint-1])
-
-special_values = strategies.sampled_from(
-    [int(v) for v in special_values if type(int(v)) is int])
-
-ints = strategies.builds(
-    int, # strategies.integers sometimes returns a long?
-    special_values | strategies.integers(
-    min_value=int(-sys.maxint-1), max_value=sys.maxint))
-
-ints_or_none = strategies.none() | ints
-
-
-def bound(a, b):
+def bound(a,b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -36,55 +14,11 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a, b)
+        return IntBound(a,b)
 
 def const(a):
     return bound(a,a)
 
-
-def build_bound_with_contained_number(a, b, c):
-    a, b, c = sorted([a, b, c])
-    r = bound(a, c)
-    assert r.contains(b)
-    return r, b
-
-bound_with_contained_number = strategies.builds(
-    build_bound_with_contained_number,
-    ints_or_none,
-    ints_or_none,
-    ints
-)
-
-unbounded = strategies.builds(
-    lambda x: (bound(None, None), int(x)),
-    ints
-)
-
-lower_bounded = strategies.builds(
-    lambda x, y: (bound(min(x, y), None), max(x, y)),
-    ints,
-    ints
-)
-
-upper_bounded = strategies.builds(
-    lambda x, y: (bound(None, max(x, y)), min(x, y)),
-    ints,
-    ints
-)
-
-bounded = strategies.builds(
-    build_bound_with_contained_number,
-    ints, ints, ints
-)
-
-constant = strategies.builds(
-    lambda x: (const(x), x),
-    ints
-)
-
-bound_with_contained_number = strategies.one_of(
-    unbounded, lower_bounded, upper_bounded, constant, bounded)
-
 def some_bounds():
     brd = [None] + range(-2, 3)
     for lower in brd:
@@ -306,6 +240,8 @@
 
 
 def test_div_bound():
+    from rpython.rtyper.lltypesystem import lltype
+    from rpython.rtyper.lltypesystem.lloperation import llop
     for _, _, b1 in some_bounds():
         for _, _, b2 in some_bounds():
             b3 = b1.py_div_bound(b2)
@@ -325,15 +261,6 @@
     assert a.contains(-3)
     assert a.contains(0)
 
-def test_mod_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.mod_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        if n2 != 0:
-                            assert b3.contains(n1 % n2)   # Python-style div
 
 def test_sub_bound():
     for _, _, b1 in some_bounds():
@@ -348,25 +275,6 @@
     assert not a.contains(-1)
     assert not a.contains(4)
 
-def test_and_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.and_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        assert b3.contains(n1 & n2)
-
-def test_or_bound():
-    for _, _, b1 in some_bounds():
-        for _, _, b2 in some_bounds():
-            b3 = b1.or_bound(b2)
-            for n1 in nbr:
-                for n2 in nbr:
-                    if b1.contains(n1) and b2.contains(n2):
-                        assert b3.contains(n1 | n2)
-                        assert b3.contains(n1 ^ n2) # we use it for xor too
-
 
 def test_next_pow2_m1():
     assert next_pow2_m1(0) == 0
@@ -377,82 +285,3 @@
     assert next_pow2_m1(80) == 127
     assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1
     assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1
-
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_add_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    print b1, n1
-    print b2, n2
-    b3 = b1.add_bound(b2)
-    try:
-        r = ovfcheck(n1 + n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_sub_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    print b1, n1
-    print b2, n2
-    b3 = b1.sub_bound(b2)
-    try:
-        r = ovfcheck(n1 - n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_mul_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.mul_bound(b2)
-    try:
-        r = ovfcheck(n1 * n2)
-    except OverflowError:
-        assert not b3.bounded()
-    else:
-        assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_div_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.py_div_bound(b2)
-    if n1 == -sys.maxint-1 and n2 == -1:
-        return # overflow
-    if n2 != 0:
-        assert b3.contains(n1 / n2)   # Python-style div
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_mod_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.mod_bound(b2)
-    if n1 == -sys.maxint-1 and n2 == -1:
-        return # overflow
-    if n2 != 0:
-        assert b3.contains(n1 % n2)   # Python-style mod
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_and_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.and_bound(b2)
-    r = n1 & n2
-    assert b3.contains(r)
-
- at given(bound_with_contained_number, bound_with_contained_number)
-def test_or_bound_random(t1, t2):
-    b1, n1 = t1
-    b2, n2 = t2
-    b3 = b1.or_bound(b2)
-    r = n1 | n2
-    assert b3.contains(r)
-    r = n1 ^ n2
-    assert b3.contains(r)

From pypy.commits at gmail.com  Thu Nov 30 11:45:07 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 30 Nov 2017 08:45:07 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: merge
Message-ID: <5a203593.8283df0a.d8245.0f49@mx.google.com>

Author: fijal
Branch: mmap-for-arenas
Changeset: r93222:2e594f3e5237
Date: 2017-11-30 18:44 +0200
http://bitbucket.org/pypy/pypy/changeset/2e594f3e5237/

Log:	merge

diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py
--- a/rpython/rtyper/lltypesystem/llarena.py
+++ b/rpython/rtyper/lltypesystem/llarena.py
@@ -332,9 +332,10 @@
     system, calling mmap()."""
     return arena_malloc(nbytes, True)
 
-def arena_munmap(arena_addr):
+def arena_munmap(arena_addr, nbytes):
     """Release an arena allocated with arena_mmap()."""
     arena_free(arena_addr)
+    assert nbytes == arena_addr.arena.nbytes
 
 
 def arena_reset(arena_addr, size, zero):

From pypy.commits at gmail.com  Thu Nov 30 11:49:31 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 08:49:31 -0800 (PST)
Subject: [pypy-commit] pypy default: Re-revert 30c6fda0a499,
 and add the proper fix, hopefully
Message-ID: <5a20369b.b198df0a.958c3.16c4@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r93223:e6c7a428f649
Date: 2017-11-30 17:48 +0100
http://bitbucket.org/pypy/pypy/changeset/e6c7a428f649/

Log:	Re-revert 30c6fda0a499, and add the proper fix, hopefully

diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py
--- a/rpython/jit/metainterp/optimizeopt/intbounds.py
+++ b/rpython/jit/metainterp/optimizeopt/intbounds.py
@@ -25,19 +25,6 @@
         return (1 << ((byte_size << 3) - 1)) - 1
 
 
-IS_64_BIT = sys.maxint > 2**32
-
-def next_pow2_m1(n):
-    """Calculate next power of 2 greater than n minus one."""
-    n |= n >> 1
-    n |= n >> 2
-    n |= n >> 4
-    n |= n >> 8
-    n |= n >> 16
-    if IS_64_BIT:
-        n |= n >> 32
-    return n
-
 
 class OptIntBounds(Optimization):
     """Keeps track of the bounds placed on integers by guards and remove
@@ -50,7 +37,7 @@
         return dispatch_postprocess(self, op)
 
     def propagate_bounds_backward(self, box):
-        # FIXME: This takes care of the instruction where box is the reuslt
+        # FIXME: This takes care of the instruction where box is the result
         #        but the bounds produced by all instructions where box is
         #        an argument might also be tighten
         b = self.getintbound(box)
@@ -91,14 +78,8 @@
         b1 = self.getintbound(v1)
         v2 = self.get_box_replacement(op.getarg(1))
         b2 = self.getintbound(v2)
-        if b1.known_ge(IntBound(0, 0)) and \
-           b2.known_ge(IntBound(0, 0)):
-            r = self.getintbound(op)
-            if b1.has_upper and b2.has_upper:
-                mostsignificant = b1.upper | b2.upper
-                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
-            else:
-                r.make_ge(IntBound(0, 0))
+        b = b1.or_bound(b2)
+        self.getintbound(op).intersect(b)
 
     optimize_INT_OR = optimize_INT_OR_or_XOR
     optimize_INT_XOR = optimize_INT_OR_or_XOR
@@ -112,15 +93,8 @@
     def postprocess_INT_AND(self, op):
         b1 = self.getintbound(op.getarg(0))
         b2 = self.getintbound(op.getarg(1))
-        r = self.getintbound(op)
-        pos1 = b1.known_ge(IntBound(0, 0))
-        pos2 = b2.known_ge(IntBound(0, 0))
-        if pos1 or pos2:
-            r.make_ge(IntBound(0, 0))
-        if pos1:
-            r.make_le(b1)
-        if pos2:
-            r.make_le(b2)
+        b = b1.and_bound(b2)
+        self.getintbound(op).intersect(b)
 
     def optimize_INT_SUB(self, op):
         return self.emit(op)
@@ -211,16 +185,10 @@
         r.intersect(b1.py_div_bound(b2))
 
     def post_call_INT_PY_MOD(self, op):
+        b1 = self.getintbound(op.getarg(1))
         b2 = self.getintbound(op.getarg(2))
-        if b2.is_constant():
-            val = b2.getint()
-            r = self.getintbound(op)
-            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
-                r.make_ge(IntBound(0, 0))
-                r.make_lt(IntBound(val, val))
-            else:               # with Python's modulo:  neg < (x % neg) <= 0
-                r.make_gt(IntBound(val, val))
-                r.make_le(IntBound(0, 0))
+        r = self.getintbound(op)
+        r.intersect(b1.mod_bound(b2))
 
     def optimize_INT_LSHIFT(self, op):
         return self.emit(op)
@@ -436,7 +404,7 @@
 
     def optimize_INT_FORCE_GE_ZERO(self, op):
         b = self.getintbound(op.getarg(0))
-        if b.known_ge(IntBound(0, 0)):
+        if b.known_nonnegative():
             self.make_equal_to(op, op.getarg(0))
         else:
             return self.emit(op)
@@ -647,7 +615,7 @@
         if r.is_constant():
             if r.getint() == valnonzero:
                 b1 = self.getintbound(op.getarg(0))
-                if b1.known_ge(IntBound(0, 0)):
+                if b1.known_nonnegative():
                     b1.make_gt(IntBound(0, 0))
                     self.propagate_bounds_backward(op.getarg(0))
             elif r.getint() == valzero:
diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -12,6 +12,19 @@
 MAXINT = maxint
 MININT = -maxint - 1
 
+IS_64_BIT = sys.maxint > 2**32
+
+def next_pow2_m1(n):
+    """Calculate next power of 2 greater than n minus one."""
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    if IS_64_BIT:
+        n |= n >> 32
+    return n
+
 
 class IntBound(AbstractInfo):
     _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower')
@@ -92,6 +105,9 @@
     def known_ge(self, other):
         return other.known_le(self)
 
+    def known_nonnegative(self):
+        return self.has_lower and 0 <= self.lower
+
     def intersect(self, other):
         r = False
 
@@ -192,10 +208,22 @@
         else:
             return IntUnbounded()
 
+    def mod_bound(self, other):
+        r = IntUnbounded()
+        if other.is_constant():
+            val = other.getint()
+            if val >= 0:        # with Python's modulo:  0 <= (x % pos) < pos
+                r.make_ge(IntBound(0, 0))
+                r.make_lt(IntBound(val, val))
+            else:               # with Python's modulo:  neg < (x % neg) <= 0
+                r.make_gt(IntBound(val, val))
+                r.make_le(IntBound(0, 0))
+        return r
+
     def lshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             try:
                 vals = (ovfcheck(self.upper << other.upper),
@@ -211,7 +239,7 @@
     def rshift_bound(self, other):
         if self.has_upper and self.has_lower and \
            other.has_upper and other.has_lower and \
-           other.known_ge(IntBound(0, 0)) and \
+           other.known_nonnegative() and \
            other.known_lt(IntBound(LONG_BIT, LONG_BIT)):
             vals = (self.upper >> other.upper,
                     self.upper >> other.lower,
@@ -221,7 +249,32 @@
         else:
             return IntUnbounded()
 
+    def and_bound(self, other):
+        pos1 = self.known_nonnegative()
+        pos2 = other.known_nonnegative()
+        r = IntUnbounded()
+        if pos1 or pos2:
+            r.make_ge(IntBound(0, 0))
+        if pos1:
+            r.make_le(self)
+        if pos2:
+            r.make_le(other)
+        return r
+
+    def or_bound(self, other):
+        r = IntUnbounded()
+        if self.known_nonnegative() and \
+                other.known_nonnegative():
+            if self.has_upper and other.has_upper:
+                mostsignificant = self.upper | other.upper
+                r.intersect(IntBound(0, next_pow2_m1(mostsignificant)))
+            else:
+                r.make_ge(IntBound(0, 0))
+        return r
+
     def contains(self, val):
+        if not we_are_translated():
+            assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):
@@ -282,7 +335,7 @@
             guards.append(op)
 
     def is_bool(self):
-        return (self.bounded() and self.known_ge(ConstIntBound(0)) and
+        return (self.bounded() and self.known_nonnegative() and
                 self.known_le(ConstIntBound(1)))
 
     def make_bool(self):
@@ -297,7 +350,7 @@
         if self.known_gt(IntBound(0, 0)) or \
            self.known_lt(IntBound(0, 0)):
             return INFO_NONNULL
-        if self.known_ge(IntBound(0, 0)) and \
+        if self.known_nonnegative() and \
            self.known_le(IntBound(0, 0)):
             return INFO_NULL
         return INFO_UNKNOWN
diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
--- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
+++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py
@@ -1,12 +1,34 @@
 from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \
-     IntLowerBound, IntUnbounded
-from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1
+     IntLowerBound, IntUnbounded, next_pow2_m1
 
 from copy import copy
 import sys
-from rpython.rlib.rarithmetic import LONG_BIT
+from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck
 
-def bound(a,b):
+from hypothesis import given, strategies
+
+special_values = (
+    range(-100, 100) +
+    [2 ** i for i in range(1, LONG_BIT)] +
+    [-2 ** i for i in range(1, LONG_BIT)] +
+    [2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i - 1 for i in range(1, LONG_BIT)] +
+    [2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [-2 ** i + 1 for i in range(1, LONG_BIT)] +
+    [sys.maxint, -sys.maxint-1])
+
+special_values = strategies.sampled_from(
+    [int(v) for v in special_values if type(int(v)) is int])
+
+ints = strategies.builds(
+    int, # strategies.integers sometimes returns a long?
+    special_values | strategies.integers(
+    min_value=int(-sys.maxint-1), max_value=sys.maxint))
+
+ints_or_none = strategies.none() | ints
+
+
+def bound(a, b):
     if a is None and b is None:
         return IntUnbounded()
     elif a is None:
@@ -14,11 +36,55 @@
     elif b is None:
         return IntLowerBound(a)
     else:
-        return IntBound(a,b)
+        return IntBound(a, b)
 
 def const(a):
     return bound(a,a)
 
+
+def build_bound_with_contained_number(a, b, c):
+    a, b, c = sorted([a, b, c])
+    r = bound(a, c)
+    assert r.contains(b)
+    return r, b
+
+bound_with_contained_number = strategies.builds(
+    build_bound_with_contained_number,
+    ints_or_none,
+    ints_or_none,
+    ints
+)
+
+unbounded = strategies.builds(
+    lambda x: (bound(None, None), int(x)),
+    ints
+)
+
+lower_bounded = strategies.builds(
+    lambda x, y: (bound(min(x, y), None), max(x, y)),
+    ints,
+    ints
+)
+
+upper_bounded = strategies.builds(
+    lambda x, y: (bound(None, max(x, y)), min(x, y)),
+    ints,
+    ints
+)
+
+bounded = strategies.builds(
+    build_bound_with_contained_number,
+    ints, ints, ints
+)
+
+constant = strategies.builds(
+    lambda x: (const(x), x),
+    ints
+)
+
+bound_with_contained_number = strategies.one_of(
+    unbounded, lower_bounded, upper_bounded, constant, bounded)
+
 def some_bounds():
     brd = [None] + range(-2, 3)
     for lower in brd:
@@ -240,8 +306,6 @@
 
 
 def test_div_bound():
-    from rpython.rtyper.lltypesystem import lltype
-    from rpython.rtyper.lltypesystem.lloperation import llop
     for _, _, b1 in some_bounds():
         for _, _, b2 in some_bounds():
             b3 = b1.py_div_bound(b2)
@@ -261,6 +325,15 @@
     assert a.contains(-3)
     assert a.contains(0)
 
+def test_mod_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.mod_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        if n2 != 0:
+                            assert b3.contains(n1 % n2)   # Python-style div
 
 def test_sub_bound():
     for _, _, b1 in some_bounds():
@@ -275,6 +348,25 @@
     assert not a.contains(-1)
     assert not a.contains(4)
 
+def test_and_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.and_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 & n2)
+
+def test_or_bound():
+    for _, _, b1 in some_bounds():
+        for _, _, b2 in some_bounds():
+            b3 = b1.or_bound(b2)
+            for n1 in nbr:
+                for n2 in nbr:
+                    if b1.contains(n1) and b2.contains(n2):
+                        assert b3.contains(n1 | n2)
+                        assert b3.contains(n1 ^ n2) # we use it for xor too
+
 
 def test_next_pow2_m1():
     assert next_pow2_m1(0) == 0
@@ -285,3 +377,82 @@
     assert next_pow2_m1(80) == 127
     assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1
     assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1
+
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_add_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.add_bound(b2)
+    try:
+        r = ovfcheck(n1 + n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_sub_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    print b1, n1
+    print b2, n2
+    b3 = b1.sub_bound(b2)
+    try:
+        r = ovfcheck(n1 - n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mul_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mul_bound(b2)
+    try:
+        r = ovfcheck(n1 * n2)
+    except OverflowError:
+        assert not b3.bounded()
+    else:
+        assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_div_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.py_div_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 / n2)   # Python-style div
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_mod_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.mod_bound(b2)
+    if n1 == -sys.maxint-1 and n2 == -1:
+        return # overflow
+    if n2 != 0:
+        assert b3.contains(n1 % n2)   # Python-style mod
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_and_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.and_bound(b2)
+    r = n1 & n2
+    assert b3.contains(r)
+
+ at given(bound_with_contained_number, bound_with_contained_number)
+def test_or_bound_random(t1, t2):
+    b1, n1 = t1
+    b2, n2 = t2
+    b3 = b1.or_bound(b2)
+    r = n1 | n2
+    assert b3.contains(r)
+    r = n1 ^ n2
+    assert b3.contains(r)

From pypy.commits at gmail.com  Thu Nov 30 12:18:36 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 09:18:36 -0800 (PST)
Subject: [pypy-commit] pypy mmap-for-arenas: translation fix
Message-ID: <5a203d6c.020a1c0a.c7afa.edad@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: mmap-for-arenas
Changeset: r93224:40ad6dbda37b
Date: 2017-11-30 18:18 +0100
http://bitbucket.org/pypy/pypy/changeset/40ad6dbda37b/

Log:	translation fix

diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py
--- a/rpython/rtyper/lltypesystem/llarena.py
+++ b/rpython/rtyper/lltypesystem/llarena.py
@@ -558,6 +558,7 @@
 
 def llimpl_arena_munmap(arena_addr, nbytes):
     from rpython.rlib import rmmap
+    assert nbytes >= 0
     rmmap.c_munmap_safe(rffi.cast(rmmap.PTR, arena_addr), nbytes)
 register_external(arena_munmap, [llmemory.Address, int], None,
                   'll_arena.arena_munmap',

From pypy.commits at gmail.com  Thu Nov 30 13:49:52 2017
From: pypy.commits at gmail.com (fijal)
Date: Thu, 30 Nov 2017 10:49:52 -0800 (PST)
Subject: [pypy-commit] pypy memory-accounting: merge mmap-for-llarena
Message-ID: <5a2052d0.e4a6df0a.26087.3d34@mx.google.com>

Author: fijal
Branch: memory-accounting
Changeset: r93225:5ba0bf0bf684
Date: 2017-11-30 20:49 +0200
http://bitbucket.org/pypy/pypy/changeset/5ba0bf0bf684/

Log:	merge mmap-for-llarena

diff too long, truncating to 2000 out of 18247 lines

diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -71,6 +71,8 @@
 ^lib_pypy/.+.c$
 ^lib_pypy/.+.o$
 ^lib_pypy/.+.so$
+^lib_pypy/.+.pyd$
+^lib_pypy/Release/
 ^pypy/doc/discussion/.+\.html$
 ^include/.+\.h$
 ^include/.+\.inl$
diff --git a/.hgtags b/.hgtags
--- a/.hgtags
+++ b/.hgtags
@@ -40,3 +40,7 @@
 2875f328eae2216a87f3d6f335092832eb031f56 release-pypy3.5-v5.7.1
 c925e73810367cd960a32592dd7f728f436c125c release-pypy2.7-v5.8.0
 a37ecfe5f142bc971a86d17305cc5d1d70abec64 release-pypy3.5-v5.8.0
+03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0
+d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0
+03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0
+84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0
diff --git a/_pytest/terminal.py b/_pytest/terminal.py
--- a/_pytest/terminal.py
+++ b/_pytest/terminal.py
@@ -366,11 +366,11 @@
             EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR,
             EXIT_NOTESTSCOLLECTED)
         if exitstatus in summary_exit_codes:
-            self.config.hook.pytest_terminal_summary(terminalreporter=self)
             self.summary_errors()
             self.summary_failures()
             self.summary_warnings()
             self.summary_passes()
+            self.config.hook.pytest_terminal_summary(terminalreporter=self)
         if exitstatus == EXIT_INTERRUPTED:
             self._report_keyboardinterrupt()
             del self._keyboardinterrupt_memo
diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt
new file mode 100644
--- /dev/null
+++ b/extra_tests/requirements.txt
@@ -0,0 +1,2 @@
+pytest
+hypothesis
diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_bytes.py
@@ -0,0 +1,84 @@
+from hypothesis import strategies as st
+from hypothesis import given, example
+
+st_bytestring = st.binary() | st.binary().map(bytearray)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st_bytestring, st_bytestring, st_bytestring)
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st_bytestring, st_bytestring)
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_startswith_start(u, v, start):
+    expected = u[start:].startswith(v) if v else (start <= len(u))
+    assert u.startswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st_bytestring, st_bytestring)
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(b'x', b'', 1)
+ at example(b'x', b'', 2)
+ at given(st_bytestring, st_bytestring, st.integers())
+def test_endswith_2(u, v, start):
+    expected = u[start:].endswith(v) if v else (start <= len(u))
+    assert u.endswith(v, start) is expected
+
+ at example(b'x', b'', 1, 0)
+ at example(b'xx', b'', -1, 0)
+ at given(st_bytestring, st_bytestring, st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py
new file mode 100644
--- /dev/null
+++ b/extra_tests/test_textio.py
@@ -0,0 +1,48 @@
+from hypothesis import given, strategies as st
+
+from io import BytesIO, TextIOWrapper
+import os
+
+def translate_newlines(text):
+    text = text.replace('\r\n', '\n')
+    text = text.replace('\r', '\n')
+    return text.replace('\n', os.linesep)
+
+ at st.composite
+def st_readline_universal(
+        draw, st_nlines=st.integers(min_value=0, max_value=10)):
+    n_lines = draw(st_nlines)
+    lines = draw(st.lists(
+        st.text(st.characters(blacklist_characters='\r\n')),
+        min_size=n_lines, max_size=n_lines))
+    limits = []
+    for line in lines:
+        limit = draw(st.integers(min_value=0, max_value=len(line) + 5))
+        limits.append(limit)
+        limits.append(-1)
+    endings = draw(st.lists(
+        st.sampled_from(['\n', '\r', '\r\n']),
+        min_size=n_lines, max_size=n_lines))
+    return (
+        ''.join(line + ending for line, ending in zip(lines, endings)),
+        limits)
+
+ at given(data=st_readline_universal(),
+       mode=st.sampled_from(['\r', '\n', '\r\n', '', None]))
+def test_readline(data, mode):
+    txt, limits = data
+    textio = TextIOWrapper(
+        BytesIO(txt.encode('utf-8', 'surrogatepass')),
+        encoding='utf-8', errors='surrogatepass', newline=mode)
+    lines = []
+    for limit in limits:
+        line = textio.readline(limit)
+        if limit >= 0:
+            assert len(line) <= limit
+        if line:
+            lines.append(line)
+        elif limit:
+            break
+    if mode is None:
+        txt = translate_newlines(txt)
+    assert txt.startswith(u''.join(lines))
diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py
--- a/extra_tests/test_unicode.py
+++ b/extra_tests/test_unicode.py
@@ -1,3 +1,4 @@
+import sys
 import pytest
 from hypothesis import strategies as st
 from hypothesis import given, settings, example
@@ -32,3 +33,89 @@
 @given(s=st.text())
 def test_composition(s, norm1, norm2, norm3):
     assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s)
+
+ at given(st.text(), st.text(), st.text())
+def test_find(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.find(u) <= len(prefix)
+    assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_index(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert 0 <= s.index(u) <= len(prefix)
+    assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rfind(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rfind(u) >= len(prefix)
+    assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+ at given(st.text(), st.text(), st.text())
+def test_rindex(u, prefix, suffix):
+    s = prefix + u + suffix
+    assert s.rindex(u) >= len(prefix)
+    assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix)
+
+def adjust_indices(u, start, end):
+    if end < 0:
+        end = max(end + len(u), 0)
+    else:
+        end = min(end, len(u))
+    if start < 0:
+        start = max(start + len(u), 0)
+    return start, end
+
+ at given(st.text(), st.text())
+def test_startswith_basic(u, v):
+    assert u.startswith(v) is (u[:len(v)] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_startswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.startswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_startswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].startswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.startswith(v, start, end) is expected
+
+ at given(st.text(), st.text())
+def test_endswith_basic(u, v):
+    if len(v) > len(u):
+        assert u.endswith(v) is False
+    else:
+        assert u.endswith(v) is (u[len(u) - len(v):] == v)
+
+ at example(u'x', u'', 1)
+ at example(u'x', u'', 2)
+ at given(st.text(), st.text(), st.integers())
+def test_endswith_2(u, v, start):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        expected = start <= len(u)
+    assert u.endswith(v, start) is expected
+
+ at example(u'x', u'', 1, 0)
+ at example(u'xx', u'', -1, 0)
+ at given(st.text(), st.text(), st.integers(), st.integers())
+def test_endswith_3(u, v, start, end):
+    if v or sys.version_info[0] == 2:
+        expected = u[start:end].endswith(v)
+    else:  # CPython leaks implementation details in this case
+        start0, end0 = adjust_indices(u, start, end)
+        expected = start0 <= len(u) and start0 <= end0
+    assert u.endswith(v, start, end) is expected
diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py
--- a/lib-python/2.7/ctypes/__init__.py
+++ b/lib-python/2.7/ctypes/__init__.py
@@ -360,14 +360,15 @@
         self._FuncPtr = _FuncPtr
 
         if handle is None:
-            if flags & _FUNCFLAG_CDECL:
-                pypy_dll = _ffi.CDLL(name, mode)
-            else:
-                pypy_dll = _ffi.WinDLL(name, mode)
-            self.__pypy_dll__ = pypy_dll
-            handle = int(pypy_dll)
-            if _sys.maxint > 2 ** 32:
-                handle = int(handle)   # long -> int
+            handle = 0
+        if flags & _FUNCFLAG_CDECL:
+            pypy_dll = _ffi.CDLL(name, mode, handle)
+        else:
+            pypy_dll = _ffi.WinDLL(name, mode, handle)
+        self.__pypy_dll__ = pypy_dll
+        handle = int(pypy_dll)
+        if _sys.maxint > 2 ** 32:
+            handle = int(handle)   # long -> int
         self._handle = handle
 
     def __repr__(self):
diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py
--- a/lib-python/2.7/inspect.py
+++ b/lib-python/2.7/inspect.py
@@ -40,6 +40,10 @@
 import linecache
 from operator import attrgetter
 from collections import namedtuple
+try:
+    from cpyext import is_cpyext_function as _is_cpyext_function
+except ImportError:
+    _is_cpyext_function = lambda obj: False
 
 # These constants are from Include/code.h.
 CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8
@@ -230,7 +234,7 @@
         __doc__         documentation string
         __name__        original name of this function or method
         __self__        instance to which a method is bound, or None"""
-    return isinstance(object, types.BuiltinFunctionType)
+    return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object)
 
 def isroutine(object):
     """Return true if the object is any kind of function or method."""
diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py
--- a/lib-python/2.7/test/test_urllib2net.py
+++ b/lib-python/2.7/test/test_urllib2net.py
@@ -286,7 +286,7 @@
             self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120)
             u.close()
 
-    FTP_HOST = 'ftp://ftp.debian.org/debian/'
+    FTP_HOST = 'ftp://www.pythontest.net/'
 
     def test_ftp_basic(self):
         self.assertIsNone(socket.getdefaulttimeout())
diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py
--- a/lib-python/2.7/warnings.py
+++ b/lib-python/2.7/warnings.py
@@ -43,11 +43,12 @@
         unicodetype = unicode
     except NameError:
         unicodetype = ()
+    template = "%s: %s: %s\n"
     try:
         message = str(message)
     except UnicodeEncodeError:
-        pass
-    s =  "%s: %s: %s\n" % (lineno, category.__name__, message)
+        template = unicode(template)
+    s = template % (lineno, category.__name__, message)
     line = linecache.getline(filename, lineno) if line is None else line
     if line:
         line = line.strip()
diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py
--- a/lib_pypy/_ctypes/array.py
+++ b/lib_pypy/_ctypes/array.py
@@ -8,60 +8,63 @@
 class ArrayMeta(_CDataMeta):
     def __new__(self, name, cls, typedict):
         res = type.__new__(self, name, cls, typedict)
-        if '_type_' in typedict:
-            ffiarray = _rawffi.Array(typedict['_type_']._ffishape_)
-            res._ffiarray = ffiarray
-            subletter = getattr(typedict['_type_'], '_type_', None)
-            if subletter == 'c':
-                def getvalue(self):
-                    return _rawffi.charp2string(self._buffer.buffer,
-                                                self._length_)
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, str):
-                        _rawffi.rawstring2charp(self._buffer.buffer, val)
-                    else:
-                        for i in range(len(val)):
-                            self[i] = val[i]
-                    if len(val) < self._length_:
-                        self._buffer[len(val)] = '\x00'
-                res.value = property(getvalue, setvalue)
 
-                def getraw(self):
-                    return _rawffi.charp2rawstring(self._buffer.buffer,
-                                                   self._length_)
+        if cls == (_CData,): # this is the Array class defined below
+            res._ffiarray = None
+            return res
+        if not hasattr(res, '_length_') or not isinstance(res._length_, int):
+            raise AttributeError(
+                "class must define a '_length_' attribute, "
+                "which must be a positive integer")
+        ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_)
+        subletter = getattr(res._type_, '_type_', None)
+        if subletter == 'c':
+            def getvalue(self):
+                return _rawffi.charp2string(self._buffer.buffer,
+                                            self._length_)
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, str):
+                    _rawffi.rawstring2charp(self._buffer.buffer, val)
+                else:
+                    for i in range(len(val)):
+                        self[i] = val[i]
+                if len(val) < self._length_:
+                    self._buffer[len(val)] = b'\x00'
+            res.value = property(getvalue, setvalue)
 
-                def setraw(self, buffer):
-                    if len(buffer) > self._length_:
-                        raise ValueError("%r too long" % (buffer,))
-                    _rawffi.rawstring2charp(self._buffer.buffer, buffer)
-                res.raw = property(getraw, setraw)
-            elif subletter == 'u':
-                def getvalue(self):
-                    return _rawffi.wcharp2unicode(self._buffer.buffer,
-                                                  self._length_)
+            def getraw(self):
+                return _rawffi.charp2rawstring(self._buffer.buffer,
+                                               self._length_)
 
-                def setvalue(self, val):
-                    # we don't want to have buffers here
-                    if len(val) > self._length_:
-                        raise ValueError("%r too long" % (val,))
-                    if isinstance(val, unicode):
-                        target = self._buffer
-                    else:
-                        target = self
-                    for i in range(len(val)):
-                        target[i] = val[i]
-                    if len(val) < self._length_:
-                        target[len(val)] = u'\x00'
-                res.value = property(getvalue, setvalue)
-                
-            if '_length_' in typedict:
-                res._ffishape_ = (ffiarray, typedict['_length_'])
-                res._fficompositesize_ = res._sizeofinstances()
-        else:
-            res._ffiarray = None
+            def setraw(self, buffer):
+                if len(buffer) > self._length_:
+                    raise ValueError("%r too long" % (buffer,))
+                _rawffi.rawstring2charp(self._buffer.buffer, buffer)
+            res.raw = property(getraw, setraw)
+        elif subletter == 'u':
+            def getvalue(self):
+                return _rawffi.wcharp2unicode(self._buffer.buffer,
+                                              self._length_)
+
+            def setvalue(self, val):
+                # we don't want to have buffers here
+                if len(val) > self._length_:
+                    raise ValueError("%r too long" % (val,))
+                if isinstance(val, unicode):
+                    target = self._buffer
+                else:
+                    target = self
+                for i in range(len(val)):
+                    target[i] = val[i]
+                if len(val) < self._length_:
+                    target[len(val)] = u'\x00'
+            res.value = property(getvalue, setvalue)
+
+        res._ffishape_ = (ffiarray, res._length_)
+        res._fficompositesize_ = res._sizeofinstances()
         return res
 
     from_address = cdata_from_address
@@ -156,7 +159,7 @@
     l = [self[i] for i in range(start, stop, step)]
     letter = getattr(self._type_, '_type_', None)
     if letter == 'c':
-        return "".join(l)
+        return b"".join(l)
     if letter == 'u':
         return u"".join(l)
     return l
diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py
--- a/lib_pypy/_ctypes/basics.py
+++ b/lib_pypy/_ctypes/basics.py
@@ -176,6 +176,10 @@
     def _get_buffer_value(self):
         return self._buffer[0]
 
+    def _copy_to(self, addr):
+        target = type(self).from_address(addr)._buffer
+        target[0] = self._get_buffer_value()
+
     def _to_ffi_param(self):
         if self.__class__._is_pointer_like():
             return self._get_buffer_value()
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -114,7 +114,9 @@
         cobj = self._type_.from_param(value)
         if ensure_objects(cobj) is not None:
             store_reference(self, index, cobj._objects)
-        self._subarray(index)[0] = cobj._get_buffer_value()
+        address = self._buffer[0]
+        address += index * sizeof(self._type_)
+        cobj._copy_to(address)
 
     def __nonzero__(self):
         return self._buffer[0] != 0
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -291,6 +291,11 @@
     def _get_buffer_value(self):
         return self._buffer.buffer
 
+    def _copy_to(self, addr):
+        from ctypes import memmove
+        origin = self._get_buffer_value()
+        memmove(addr, origin, self._fficompositesize_)
+
     def _to_ffi_param(self):
         return self._buffer
 
diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py
--- a/lib_pypy/_ctypes_test.py
+++ b/lib_pypy/_ctypes_test.py
@@ -21,5 +21,11 @@
         with fp:
             imp.load_module('_ctypes_test', fp, filename, description)
     except ImportError:
+        if os.name == 'nt':
+            # hack around finding compilers on win32
+            try:
+                import setuptools
+            except ImportError:
+                pass
         print('could not find _ctypes_test in %s' % output_dir)
         _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir)
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -1028,21 +1028,25 @@
         if '\0' in sql:
             raise ValueError("the query contains a null character")
 
-        first_word = sql.lstrip().split(" ")[0].upper()
-        if first_word == "":
+        
+        if sql:
+            first_word = sql.lstrip().split()[0].upper()
+            if first_word == '':
+                self._type = _STMT_TYPE_INVALID
+            if first_word == "SELECT":
+                self._type = _STMT_TYPE_SELECT
+            elif first_word == "INSERT":
+                self._type = _STMT_TYPE_INSERT
+            elif first_word == "UPDATE":
+                self._type = _STMT_TYPE_UPDATE
+            elif first_word == "DELETE":
+                self._type = _STMT_TYPE_DELETE
+            elif first_word == "REPLACE":
+                self._type = _STMT_TYPE_REPLACE
+            else:
+                self._type = _STMT_TYPE_OTHER
+        else:
             self._type = _STMT_TYPE_INVALID
-        elif first_word == "SELECT":
-            self._type = _STMT_TYPE_SELECT
-        elif first_word == "INSERT":
-            self._type = _STMT_TYPE_INSERT
-        elif first_word == "UPDATE":
-            self._type = _STMT_TYPE_UPDATE
-        elif first_word == "DELETE":
-            self._type = _STMT_TYPE_DELETE
-        elif first_word == "REPLACE":
-            self._type = _STMT_TYPE_REPLACE
-        else:
-            self._type = _STMT_TYPE_OTHER
 
         if isinstance(sql, unicode):
             sql = sql.encode('utf-8')
diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py
--- a/lib_pypy/_testcapi.py
+++ b/lib_pypy/_testcapi.py
@@ -16,4 +16,10 @@
     with fp:
         imp.load_module('_testcapi', fp, filename, description)
 except ImportError:
+    if os.name == 'nt':
+        # hack around finding compilers on win32
+        try:
+            import setuptools
+        except ImportError:
+            pass
     _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir)
diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py
--- a/lib_pypy/_tkinter/app.py
+++ b/lib_pypy/_tkinter/app.py
@@ -119,7 +119,7 @@
                              tklib.TCL_GLOBAL_ONLY)
 
         # This is used to get the application class for Tk 4.1 and up
-        argv0 = className.lower()
+        argv0 = className.lower().encode('ascii')
         tklib.Tcl_SetVar(self.interp, "argv0", argv0,
                          tklib.TCL_GLOBAL_ONLY)
 
@@ -180,6 +180,9 @@
             if err == tklib.TCL_ERROR:
                 self.raiseTclError()
 
+    def interpaddr(self):
+        return int(tkffi.cast('size_t', self.interp))
+
     def _var_invoke(self, func, *args, **kwargs):
         if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread():
             # The current thread is not the interpreter thread.
diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO
--- a/lib_pypy/cffi.egg-info/PKG-INFO
+++ b/lib_pypy/cffi.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: cffi
-Version: 1.11.1
+Version: 1.11.2
 Summary: Foreign Function Interface for Python calling C code.
 Home-page: http://cffi.readthedocs.org
 Author: Armin Rigo, Maciej Fijalkowski
diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py
--- a/lib_pypy/cffi/__init__.py
+++ b/lib_pypy/cffi/__init__.py
@@ -4,8 +4,8 @@
 from .api import FFI
 from .error import CDefError, FFIError, VerificationError, VerificationMissing
 
-__version__ = "1.11.1"
-__version_info__ = (1, 11, 1)
+__version__ = "1.11.2"
+__version_info__ = (1, 11, 2)
 
 # The verifier module file names are based on the CRC32 of a string that
 # contains the following version number.  It may be older than __version__
diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h
--- a/lib_pypy/cffi/_cffi_include.h
+++ b/lib_pypy/cffi/_cffi_include.h
@@ -238,9 +238,9 @@
 _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x)
 {
     if (sizeof(_cffi_wchar_t) == 2)
-        return _cffi_from_c_wchar_t(x);
+        return _cffi_from_c_wchar_t((_cffi_wchar_t)x);
     else
-        return _cffi_from_c_wchar3216_t(x);
+        return _cffi_from_c_wchar3216_t((int)x);
 }
 
 _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o)
@@ -254,7 +254,7 @@
 _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x)
 {
     if (sizeof(_cffi_wchar_t) == 4)
-        return _cffi_from_c_wchar_t(x);
+        return _cffi_from_c_wchar_t((_cffi_wchar_t)x);
     else
         return _cffi_from_c_wchar3216_t(x);
 }
diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h
--- a/lib_pypy/cffi/_embedding.h
+++ b/lib_pypy/cffi/_embedding.h
@@ -247,7 +247,7 @@
 
         if (f != NULL && f != Py_None) {
             PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME
-                               "\ncompiled with cffi version: 1.11.1"
+                               "\ncompiled with cffi version: 1.11.2"
                                "\n_cffi_backend module: ", f);
             modules = PyImport_GetModuleDict();
             mod = PyDict_GetItemString(modules, "_cffi_backend");
diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst
--- a/pypy/doc/build.rst
+++ b/pypy/doc/build.rst
@@ -119,7 +119,7 @@
 
 To run untranslated tests, you need the Boehm garbage collector libgc.
 
-On recent Debian and Ubuntu (like 17.04), this is the command to install
+On recent Debian and Ubuntu (16.04 onwards), this is the command to install
 all build-time dependencies::
 
     apt-get install gcc make libffi-dev pkg-config zlib1g-dev libbz2-dev \
@@ -127,7 +127,7 @@
     tk-dev libgc-dev python-cffi \
     liblzma-dev libncursesw5-dev     # these two only needed on PyPy3
 
-On older Debian and Ubuntu (12.04 to 16.04)::
+On older Debian and Ubuntu (12.04-14.04)::
 
     apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \
     libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \
@@ -149,12 +149,23 @@
     xz-devel # For lzma on PyPy3.
     (XXX plus the SLES11 version of libgdbm-dev and tk-dev)
 
-On Mac OS X, most of these build-time dependencies are installed alongside
+On Mac OS X::
+
+Most of these build-time dependencies are installed alongside
 the Developer Tools. However, note that in order for the installation to
 find them you may need to run::
 
     xcode-select --install
 
+An exception is OpenSSL, which is no longer provided with the operating
+system. It can be obtained via Homebrew (with ``$ brew install openssl``),
+but it will not be available on the system path by default. The easiest
+way to enable it for building pypy is to set an environment variable::
+
+    export PKG_CONFIG_PATH=$(brew --prefix)/opt/openssl/lib/pkgconfig
+
+After setting this, translation (described next) will find the OpenSSL libs
+as expected.
 
 Run the translation
 -------------------
@@ -187,18 +198,18 @@
    entire pypy interpreter. This step is currently singe threaded, and RAM
    hungry. As part of this step,  the chain creates a large number of C code
    files and a Makefile to compile them in a
-   directory controlled by the ``PYPY_USESSION_DIR`` environment variable.  
+   directory controlled by the ``PYPY_USESSION_DIR`` environment variable.
 2. Create an executable ``pypy-c`` by running the Makefile. This step can
-   utilize all possible cores on the machine.  
-3. Copy the needed binaries to the current directory.  
-4. Generate c-extension modules for any cffi-based stdlib modules.  
+   utilize all possible cores on the machine.
+3. Copy the needed binaries to the current directory.
+4. Generate c-extension modules for any cffi-based stdlib modules.
 
 
 The resulting executable behaves mostly like a normal Python
 interpreter (see :doc:`cpython_differences`), and is ready for testing, for
 use as a base interpreter for a new virtualenv, or for packaging into a binary
 suitable for installation on another machine running the same OS as the build
-machine. 
+machine.
 
 Note that step 4 is merely done as a convenience, any of the steps may be rerun
 without rerunning the previous steps.
@@ -255,7 +266,7 @@
 
 * PyPy 2.5.1 or earlier: normal users would see permission errors.
   Installers need to run ``pypy -c "import gdbm"`` and other similar
-  commands at install time; the exact list is in 
+  commands at install time; the exact list is in
   :source:`pypy/tool/release/package.py <package.py>`.  Users
   seeing a broken installation of PyPy can fix it after-the-fact if they
   have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``.
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -182,6 +182,57 @@
 technical difficulties.
 
 
+What about numpy, numpypy, micronumpy?
+--------------------------------------
+
+Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy.  It
+has two pieces:
+
+  * the builtin module :source:`pypy/module/micronumpy`: this is written in
+    RPython and roughly covers the content of the ``numpy.core.multiarray``
+    module. Confusingly enough, this is available in PyPy under the name
+    ``_numpypy``.  It is included by default in all the official releases of
+    PyPy (but it might be dropped in the future).
+
+  * a fork_ of the official numpy repository maintained by us and informally
+    called ``numpypy``: even more confusing, the name of the repo on bitbucket
+    is ``numpy``.  The main difference with the upstream numpy, is that it is
+    based on the micronumpy module written in RPython, instead of of
+    ``numpy.core.multiarray`` which is written in C.
+
+Moreover, it is also possible to install the upstream version of ``numpy``:
+its core is written in C and it runs on PyPy under the cpyext compatibility
+layer. This is what you get if you do ``pypy -m pip install numpy``.
+
+
+Should I install numpy or numpypy?
+-----------------------------------
+
+TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip
+install numpy``.  You might also be interested in using the experimental `PyPy
+binary wheels`_ to save compilation time.
+
+The upstream ``numpy`` is written in C, and runs under the cpyext
+compatibility layer.  Nowadays, cpyext is mature enough that you can simply
+use the upstream ``numpy``, since it passes 99.9% of the test suite. At the
+moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext
+is infamously slow, and thus it has worse performance compared to
+``numpypy``. However, we are actively working on improving it, as we expect to
+reach the same speed, eventually.
+
+On the other hand, ``numpypy`` is more JIT-friendly and very fast to call,
+since it is written in RPython: but it is a reimplementation, and it's hard to
+be completely compatible: over the years the project slowly matured and
+eventually it was able to call out to the LAPACK and BLAS libraries to speed
+matrix calculations, and reached around an 80% parity with the upstream
+numpy. However, 80% is far from 100%.  Since cpyext/numpy compatibility is
+progressing fast, we have discontinued support for ``numpypy``.
+
+.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html
+.. _fork: https://bitbucket.org/pypy/numpy
+.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels
+
+
 Is PyPy more clever than CPython about Tail Calls?
 --------------------------------------------------
 
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -240,9 +240,12 @@
 
 **matplotlib** https://github.com/matplotlib/matplotlib
 
-    TODO: the tkagg backend does not work, which makes tests fail on downstream
-    projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in 
-    tkagg.py, which requires refactoring
+    Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of
+    matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the
+    tkagg backend can function.
+
+    TODO: the matplotlib branch passes numpy arrays by value (copying all the
+    data), this proof-of-concept needs help to become completely compliant
 
 **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi
 
diff --git a/pypy/doc/release-v5.9.0.rst b/pypy/doc/release-v5.9.0.rst
--- a/pypy/doc/release-v5.9.0.rst
+++ b/pypy/doc/release-v5.9.0.rst
@@ -10,18 +10,24 @@
 This new PyPy2.7 release includes the upstream stdlib version 2.7.13, and
 PyPy3.5 includes the upstream stdlib version 3.5.3.
 
-Only a handful of failing tests remain in NumPy and Pandas on PyPy2.7, issues
-that appeared as excessive memory use were cleared up and other incompatibilities
-were resolved.
+NumPy and Pandas now work on PyPy2.7 (together with Cython 0.27.1). Issues
+that appeared as excessive memory
+use were cleared up and other incompatibilities were resolved. The C-API
+compatibility layer does slow down code which crosses the python-c interface
+often, we have ideas on how it could be improved, and still recommend
+using pure python on PyPy or interfacing via CFFI_. Many other modules
+based on C-API exentions now work on PyPy as well.
 
-Cython 0.27 (released last week) should support more projects with PyPy, both
-on PyPy2.7 and PyPy3.5 beta.
+Cython 0.27.1 (released very recently) supports more projects with PyPy, both
+on PyPy2.7 and PyPy3.5 beta. Note version **0.27.1** is now the minimum
+version that supports this version of PyPy, due to some interactions with
+updated C-API interface code.
 
 We optimized the JSON parser for recurring string keys, which should decrease
 memory use to 50% and increase parsing speed by up to 15% for large JSON files
 with many repeating dictionary keys (which is quite common).
 
-CFFI_, which is part of the PyPy release, has been updated to 1.11,
+CFFI_, which is part of the PyPy release, has been updated to 1.11.1,
 improving an already great package for interfacing with C. CFFI now supports
 complex arguments in API mode, as well as ``char16_t`` and ``char32_t`` and has
 improved support for callbacks.
@@ -145,6 +151,7 @@
   * Issue 2590_: fix the bounds in the GC when allocating a lot of objects with finalizers
   * Replace magical NOT RPYTHON comment with a decorator
   * Implement ``socket.sendmsg()``/``.recvmsg()`` for py3.5
+  * Add ``memory_pressure`` for ``_SSLSocket`` objects
 
 * Degredations
 
@@ -163,7 +170,8 @@
 
   * Add support for ``_PyNamespace_New``, ``PyMemoryView_FromMemory``, 
     ``Py_EnterRecursiveCall`` raising RecursionError, ``PyObject_LengthHint``,
-    ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject``
+    ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject``,
+    ``PyGenObject``, ``PyUnicode_Substring``, ``PyLong_FromUnicodeObject``
   * Implement ``PyType_FromSpec`` (PEP 384) and fix issues with PEP 489 support
   * Support the new version of ``os.stat()`` on win32
   * Use ``stat3()`` on Posix
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -3,4 +3,29 @@
 ===========================
 
 .. this is a revision shortly after release-pypy2.7-v5.9.0
-.. startrev:899e5245de1e
+.. startrev:d56dadcef996
+
+.. branch: cppyy-packaging
+Cleanup and improve cppyy packaging
+
+.. branch: docs-osx-brew-openssl
+
+.. branch: keep-debug-symbols
+Add a smartstrip tool, which can optionally keep the debug symbols in a
+separate file, instead of just stripping them away. Use it in packaging
+
+.. branch: bsd-patches
+Fix failures on FreeBSD, contributed by David Naylor as patches on the issue
+tracker (issues 2694, 2695, 2696, 2697)
+
+.. branch: run-extra-tests
+Run extra_tests/ in buildbot
+
+.. branch: vmprof-0.4.10
+Upgrade the _vmprof backend to vmprof 0.4.10
+
+.. branch: fix-vmprof-stacklet-switch
+Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...)
+
+.. branch: win32-vcvars
+
diff --git a/pypy/doc/whatsnew-pypy2-5.9.0.rst b/pypy/doc/whatsnew-pypy2-5.9.0.rst
--- a/pypy/doc/whatsnew-pypy2-5.9.0.rst
+++ b/pypy/doc/whatsnew-pypy2-5.9.0.rst
@@ -85,3 +85,12 @@
 .. branch: py_ssize_t
 
 Explicitly use Py_ssize_t as the Signed type in pypy c-api
+
+.. branch: cpyext-jit
+
+Differentiate the code to call METH_NOARGS, METH_O and METH_VARARGS in cpyext:
+this allows to write specialized code which is much faster than previous
+completely generic version. Moreover, let the JIT to look inside the cpyext
+module: the net result is that cpyext calls are up to 7x faster. However, this
+is true only for very simple situations: in all real life code, we are still
+much slower than CPython (more optimizations to come)
diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst
--- a/pypy/doc/windows.rst
+++ b/pypy/doc/windows.rst
@@ -25,8 +25,10 @@
 
 This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has
 made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link
-was checked in Nov 2016). Note that the compiler suite will be installed in
-``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``.
+was checked in Nov 2016). Note that the compiler suite may be installed in
+``C:\Users\<user name>\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``
+or in
+``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``.
 A current version of ``setuptools`` will be able to find it there. For
 Windows 10, you must right-click the download, and under ``Properties`` ->
 ``Compatibility`` mark it as ``Run run this program in comatibility mode for``
@@ -41,7 +43,6 @@
 -----------------------------------
 
 We routinely test translation using v9, also known as Visual Studio 2008.
-Our buildbot is still using the Express Edition, not the compiler noted above.
 Other configurations may work as well.
 
 The translation scripts will set up the appropriate environment variables
@@ -81,6 +82,30 @@
 
 .. _build instructions: http://pypy.org/download.html#building-from-source
 
+Setting Up Visual Studio for building SSL in Python3
+----------------------------------------------------
+
+On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after
+translation. However ``distutils`` does not support the Micorosft-provided Visual C
+compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The
+traditional solution to this problem is to install the ``setuptools`` module
+via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However
+``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on
+``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which
+depends on ``ssl``.
+
+In order to solve this, the buildbot sets an environment varaible that helps
+``distutils`` find the compiler without ``setuptools``::
+
+     set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin
+
+or whatever is appropriate for your machine. Note that this is not enough, you
+must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the
+``...\9.0\VC`` directory, and edit it, changing the lines that set
+``VCINSTALLDIR`` and ``WindowsSdkDir``::
+    set VCINSTALLDIR=%~dp0\
+    set WindowsSdkDir=%~dp0\..\WinSDK\
+
 
 Preparing Windows for the large build
 -------------------------------------
diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py
--- a/pypy/goal/getnightly.py
+++ b/pypy/goal/getnightly.py
@@ -15,7 +15,7 @@
     arch = 'linux'
     cmd = 'wget "%s"'
     TAR_OPTIONS += ' --wildcards'
-    binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'"
+    binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'"
     if os.uname()[-1].startswith('arm'):
         arch += '-armhf-raspbian'
 elif sys.platform.startswith('darwin'):
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -85,13 +85,17 @@
     # permissive parsing of the given list of tokens; it relies on
     # the real parsing done afterwards to give errors.
     it.skip_newlines()
-    it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
-    if it.skip(pygram.tokens.STRING):
-        it.skip_newlines()
 
-    while (it.skip_name("from") and
+    docstring_possible = True
+    while True:
+        it.skip_name("r") or it.skip_name("u") or it.skip_name("ru")
+        if docstring_possible and it.skip(pygram.tokens.STRING):
+            it.skip_newlines()
+            docstring_possible = False
+        if not (it.skip_name("from") and
            it.skip_name("__future__") and
            it.skip_name("import")):
+            break
         it.skip(pygram.tokens.LPAR)    # optionally
         # return in 'last_position' any line-column pair that points
         # somewhere inside the last __future__ import statement
diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py
--- a/pypy/interpreter/pyparser/test/test_future.py
+++ b/pypy/interpreter/pyparser/test/test_future.py
@@ -208,3 +208,13 @@
          'from __future__ import with_statement;')
     f = run(s, (2, 23))
     assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT
+
+def test_future_doc_future():
+    # for some reason people do this :-[
+    s = '''
+from  __future__ import generators
+"Docstring"
+from  __future__ import division
+    '''
+    f = run(s, (4, 24))
+    assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -3,7 +3,7 @@
 from rpython.rlib import rdynload, clibffi
 from rpython.rtyper.lltypesystem import rffi
 
-VERSION = "1.11.1"
+VERSION = "1.11.2"
 
 FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI
 try:
diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py
--- a/pypy/module/_cffi_backend/cffi1_module.py
+++ b/pypy/module/_cffi_backend/cffi1_module.py
@@ -1,4 +1,5 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
+from rpython.rlib import jit
 
 from pypy.interpreter.error import oefmt
 from pypy.interpreter.module import Module
@@ -15,7 +16,7 @@
 
 INITFUNCPTR = lltype.Ptr(lltype.FuncType([rffi.VOIDPP], lltype.Void))
 
-
+ at jit.dont_look_inside
 def load_cffi1_module(space, name, path, initptr):
     # This is called from pypy.module.cpyext.api.load_extension_module()
     from pypy.module._cffi_backend.call_python import get_ll_cffi_call_python
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -156,10 +156,11 @@
 
 class W_CTypePtrBase(W_CTypePtrOrArray):
     # base class for both pointers and pointers-to-functions
-    _attrs_ = ['is_void_ptr', 'is_voidchar_ptr']
-    _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr']
+    _attrs_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr']
+    _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr']
     is_void_ptr = False
     is_voidchar_ptr = False
+    is_onebyte_ptr = False
 
     def convert_to_object(self, cdata):
         ptrdata = rffi.cast(rffi.CCHARPP, cdata)[0]
@@ -179,12 +180,20 @@
             if self.is_void_ptr or other.is_void_ptr:
                 pass     # cast from or to 'void *'
             elif self.is_voidchar_ptr or other.is_voidchar_ptr:
-                space = self.space
-                msg = ("implicit cast from '%s' to '%s' "
-                    "will be forbidden in the future (check that the types "
-                    "are as you expect; use an explicit ffi.cast() if they "
-                    "are correct)" % (other.name, self.name))
-                space.warn(space.newtext(msg), space.w_UserWarning)
+                # for backward compatibility, accept "char *" as either
+                # source of target.  This is not what C does, though,
+                # so emit a warning that will eventually turn into an
+                # error.  The warning is turned off if both types are
+                # pointers to single bytes.
+                if self.is_onebyte_ptr and other.is_onebyte_ptr:
+                    pass   # no warning
+                else:
+                    space = self.space
+                    msg = ("implicit cast from '%s' to '%s' "
+                        "will be forbidden in the future (check that the types "
+                        "are as you expect; use an explicit ffi.cast() if they "
+                        "are correct)" % (other.name, self.name))
+                    space.warn(space.newtext(msg), space.w_UserWarning)
             else:
                 raise self._convert_error("compatible pointer", w_ob)
 
@@ -214,6 +223,7 @@
         self.is_void_ptr = isinstance(ctitem, ctypevoid.W_CTypeVoid)
         self.is_voidchar_ptr = (self.is_void_ptr or
                            isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar))
+        self.is_onebyte_ptr = (ctitem.size == 1)
         W_CTypePtrBase.__init__(self, space, size, extra, 2, ctitem)
 
     def newp(self, w_init, allocator):
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -1,7 +1,7 @@
 # ____________________________________________________________
 
 import sys
-assert __version__ == "1.11.1", ("This test_c.py file is for testing a version"
+assert __version__ == "1.11.2", ("This test_c.py file is for testing a version"
                                  " of cffi that differs from the one that we"
                                  " get from 'import _cffi_backend'")
 if sys.version_info < (3,):
@@ -2099,7 +2099,8 @@
     if sys.platform.startswith("linux"):
         BWChar = new_primitive_type("wchar_t")
         assert sizeof(BWChar) == 4
-        assert int(cast(BWChar, -1)) == -1        # signed, on linux
+        # wchar_t is often signed on Linux, but not always (e.g. on ARM)
+        assert int(cast(BWChar, -1)) in (-1, 4294967295)
 
 def test_char16():
     BChar16 = new_primitive_type("char16_t")
@@ -3903,9 +3904,11 @@
     BCharP = new_pointer_type(new_primitive_type("char"))
     BIntP = new_pointer_type(new_primitive_type("int"))
     BVoidP = new_pointer_type(new_void_type())
+    BUCharP = new_pointer_type(new_primitive_type("unsigned char"))
     z1 = cast(BCharP, 0)
     z2 = cast(BIntP, 0)
     z3 = cast(BVoidP, 0)
+    z4 = cast(BUCharP, 0)
     with warnings.catch_warnings(record=True) as w:
         newp(new_pointer_type(BIntP), z1)    # warn
         assert len(w) == 1
@@ -3919,6 +3922,12 @@
         assert len(w) == 2
         newp(new_pointer_type(BIntP), z3)    # fine
         assert len(w) == 2
+        newp(new_pointer_type(BCharP), z4)   # fine (ignore signedness here)
+        assert len(w) == 2
+        newp(new_pointer_type(BUCharP), z1)  # fine (ignore signedness here)
+        assert len(w) == 2
+        newp(new_pointer_type(BUCharP), z3)  # fine
+        assert len(w) == 2
     # check that the warnings are associated with lines in this file
     assert w[1].lineno == w[0].lineno + 4
 
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -66,20 +66,17 @@
                             "position %d from error handler out of bounds",
                             newpos)
             replace = space.unicode_w(w_replace)
-            return replace, newpos
+            if decode:
+                return replace, newpos
+            else:
+                return replace, None, newpos
         return call_errorhandler
 
     def make_decode_errorhandler(self, space):
         return self._make_errorhandler(space, True)
 
     def make_encode_errorhandler(self, space):
-        errorhandler = self._make_errorhandler(space, False)
-        def encode_call_errorhandler(errors, encoding, reason, input, startpos,
-                                     endpos):
-            replace, newpos = errorhandler(errors, encoding, reason, input,
-                                           startpos, endpos)
-            return replace, None, newpos
-        return encode_call_errorhandler
+        return self._make_errorhandler(space, False)
 
     def get_unicodedata_handler(self, space):
         if self.unicodedata_handler:
diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_continuation/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+import sys
+
+def pytest_configure(config):
+    if sys.platform.startswith('linux'):
+        from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux
+        configure_libbacktrace_linux()
diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py
--- a/pypy/module/_continuation/test/test_stacklet.py
+++ b/pypy/module/_continuation/test/test_stacklet.py
@@ -8,6 +8,35 @@
         cls.w_translated = cls.space.wrap(
             os.path.join(os.path.dirname(__file__),
                          'test_translated.py'))
+        cls.w_stack = cls.space.appexec([], """():
+            import sys
+            def stack(f=None):
+                '''
+                get the call-stack of the caller or the specified frame
+                '''
+                if f is None:
+                    f = sys._getframe(1)
+                res = []
+                seen = set()
+                while f:
+                    if f in seen:
+                        # frame cycle
+                        res.append('...')
+                        break
+                    if f.f_code.co_name == 'runtest':
+                        # if we are running with -A, cut all the stack above
+                        # the test function
+                        break
+                    seen.add(f)
+                    res.append(f.f_code.co_name)
+                    f = f.f_back
+                #print res
+                return res
+            return stack
+       """)
+        if cls.runappdirect:
+            # make sure that "self.stack" does not pass the self
+            cls.w_stack = staticmethod(cls.w_stack.im_func)
 
     def test_new_empty(self):
         from _continuation import continulet
@@ -290,66 +319,100 @@
     def test_random_switching(self):
         from _continuation import continulet
         #
+        seen = []
+        #
         def t1(c1):
-            return c1.switch()
+            seen.append(3)
+            res = c1.switch()
+            seen.append(6)
+            return res
+        #
         def s1(c1, n):
+            seen.append(2)
             assert n == 123
             c2 = t1(c1)
-            return c1.switch('a') + 1
+            seen.append(7)
+            res = c1.switch('a') + 1
+            seen.append(10)
+            return res
         #
         def s2(c2, c1):
+            seen.append(5)
             res = c1.switch(c2)
+            seen.append(8)
             assert res == 'a'
-            return c2.switch('b') + 2
+            res = c2.switch('b') + 2
+            seen.append(12)
+            return res
         #
         def f():
+            seen.append(1)
             c1 = continulet(s1, 123)
             c2 = continulet(s2, c1)
             c1.switch()
+            seen.append(4)
             res = c2.switch()
+            seen.append(9)
             assert res == 'b'
             res = c1.switch(1000)
+            seen.append(11)
             assert res == 1001
-            return c2.switch(2000)
+            res = c2.switch(2000)
+            seen.append(13)
+            return res
         #
         res = f()
         assert res == 2002
+        assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
 
     def test_f_back(self):
         import sys
         from _continuation import continulet
+        stack = self.stack
         #
-        def g(c):
+        def bar(c):
+            assert stack() == ['bar', 'foo', 'test_f_back']
             c.switch(sys._getframe(0))
             c.switch(sys._getframe(0).f_back)
             c.switch(sys._getframe(1))
+            #
+            assert stack() == ['bar', 'foo', 'main', 'test_f_back']
             c.switch(sys._getframe(1).f_back)
-            assert sys._getframe(2) is f3.f_back
+            #
+            assert stack() == ['bar', 'foo', 'main2', 'test_f_back']
+            assert sys._getframe(2) is f3_foo.f_back
             c.switch(sys._getframe(2))
-        def f(c):
-            g(c)
+        def foo(c):
+            bar(c)
         #
-        c = continulet(f)
-        f1 = c.switch()
-        assert f1.f_code.co_name == 'g'
-        f2 = c.switch()
-        assert f2.f_code.co_name == 'f'
-        f3 = c.switch()
-        assert f3 is f2
-        assert f1.f_back is f3
+        assert stack() == ['test_f_back']
+        c = continulet(foo)
+        f1_bar = c.switch()
+        assert f1_bar.f_code.co_name == 'bar'
+        f2_foo = c.switch()
+        assert f2_foo.f_code.co_name == 'foo'
+        f3_foo = c.switch()
+        assert f3_foo is f2_foo
+        assert f1_bar.f_back is f3_foo
+        #
         def main():
-            f4 = c.switch()
-            assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f4_main = c.switch()
+            assert f4_main.f_code.co_name == 'main'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack() == ['main', 'test_f_back']
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         def main2():
-            f5 = c.switch()
-            assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name)
-            assert f3.f_back is f1    # not running, so a loop
+            f5_main2 = c.switch()
+            assert f5_main2.f_code.co_name == 'main2'
+            assert f3_foo.f_back is f1_bar    # not running, so a loop
+            assert stack(f1_bar) == ['bar', 'foo', '...']
+        #
         main()
         main2()
         res = c.switch()
         assert res is None
-        assert f3.f_back is None
+        assert f3_foo.f_back is None
 
     def test_traceback_is_complete(self):
         import sys
diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py
--- a/pypy/module/_continuation/test/test_translated.py
+++ b/pypy/module/_continuation/test/test_translated.py
@@ -5,6 +5,7 @@
     py.test.skip("to run on top of a translated pypy-c")
 
 import sys, random
+from rpython.tool.udir import udir
 
 # ____________________________________________________________
 
@@ -92,6 +93,33 @@
         from pypy.conftest import option
         if not option.runappdirect:
             py.test.skip("meant only for -A run")
+        cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof')))
+
+    def test_vmprof(self):
+        """
+        The point of this test is to check that we do NOT segfault.  In
+        particular, we need to ensure that vmprof does not sample the stack in
+        the middle of a switch, else we read nonsense.
+        """
+        try:
+            import _vmprof
+        except ImportError:
+            py.test.skip("no _vmprof")
+        #
+        def switch_forever(c):
+            while True:
+                c.switch()
+        #
+        f = open(self.vmprof_file, 'w+b')
+        _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False)
+        c = _continuation.continulet(switch_forever)
+        for i in range(10**7):
+            if i % 100000 == 0:
+                print i
+            c.switch()
+        _vmprof.disable()
+        f.close()
+
 
 def _setup():
     for _i in range(20):
diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py
--- a/pypy/module/_cppyy/__init__.py
+++ b/pypy/module/_cppyy/__init__.py
@@ -1,28 +1,27 @@
 from pypy.interpreter.mixedmodule import MixedModule
 
 class Module(MixedModule):
-    "This module provides runtime bindings to C++ code for which reflection\n\
-    info has been generated. Current supported back-ends are Reflex and CINT.\n\
-    See http://doc.pypy.org/en/latest/cppyy.html for full details."
+    "This module brigdes the cppyy frontend with its backend, through PyPy.\n\
+    See http://cppyy.readthedocs.io/en/latest for full details."
 
     interpleveldefs = {
-        '_load_dictionary'       : 'interp_cppyy.load_dictionary',
         '_resolve_name'          : 'interp_cppyy.resolve_name',
         '_scope_byname'          : 'interp_cppyy.scope_byname',
-        '_template_byname'       : 'interp_cppyy.template_byname',
+        '_is_template'           : 'interp_cppyy.is_template',
         '_std_string_name'       : 'interp_cppyy.std_string_name',
         '_set_class_generator'   : 'interp_cppyy.set_class_generator',
         '_set_function_generator': 'interp_cppyy.set_function_generator',
         '_register_class'        : 'interp_cppyy.register_class',
         '_get_nullptr'           : 'interp_cppyy.get_nullptr',
-        'CPPInstanceBase'        : 'interp_cppyy.W_CPPInstance',
+        'CPPClassBase'           : 'interp_cppyy.W_CPPClass',
         'addressof'              : 'interp_cppyy.addressof',
+        '_bind_object'           : 'interp_cppyy._bind_object',
         'bind_object'            : 'interp_cppyy.bind_object',
+        'move'                   : 'interp_cppyy.move',
     }
 
     appleveldefs = {
         '_init_pythonify'        : 'pythonify._init_pythonify',
-        'load_reflection_info'   : 'pythonify.load_reflection_info',
         'add_pythonization'      : 'pythonify.add_pythonization',
         'Template'               : 'pythonify.CPPTemplate',
     }
diff --git a/pypy/module/_cppyy/backend/create_cppyy_package.py b/pypy/module/_cppyy/backend/create_cppyy_package.py
deleted file mode 100755
--- a/pypy/module/_cppyy/backend/create_cppyy_package.py
+++ /dev/null
@@ -1,649 +0,0 @@
-#!/usr/bin/env python
-from __future__ import print_function
-
-import os, sys
-import argparse, re, shutil, tarfile, urllib2
-
-
-DEBUG_TESTBUILD = False
-
-TARBALL_CACHE_DIR = 'releases'
-
-ROOT_KEEP = ['build', 'cmake', 'config', 'core', 'etc', 'interpreter',
-             'io', 'LICENSE', 'net', 'Makefile', 'CMakeLists.txt', 'math',
-             'main'] # main only needed in more recent root b/c of rootcling
-ROOT_CORE_KEEP = ['CMakeLists.txt', 'base', 'clib', 'clingutils', 'cont',
-                  'dictgen', 'foundation', 'lzma', 'macosx', 'meta',
-                  'metacling', 'metautils', 'rootcling_stage1', 'textinput',
-                  'thread', 'unix', 'utils', 'winnt', 'zip']
-ROOT_IO_KEEP = ['CMakeLists.txt', 'io', 'rootpcm']
-ROOT_NET_KEEP = ['CMakeLists.txt', 'net']
-ROOT_MATH_KEEP = ['CMakeLists.txt', 'mathcore']
-ROOT_ETC_KEEP = ['Makefile.arch', 'class.rules', 'cmake', 'dictpch',
-                 'gdb-backtrace.sh', 'gitinfo.txt', 'helgrind-root.supp',
-                 'hostcert.conf', 'system.plugins-ios',
-                 'valgrind-root-python.supp', 'valgrind-root.supp', 'vmc']
-
-ROOT_EXPLICIT_REMOVE = ['core/base/v7', 'math/mathcore/v7', 'io/io/v7']
-
-
-ERR_RELEASE_NOT_FOUND = 2
-
-
-#
-## CLI arguments
-#
-class ReleaseValidation(argparse.Action):
-    def __call__(self, parser, namespace, value, option_string=None):
-        if not re.match(r'6\.\d\d\.\d\d', value):
-            raise argparse.ArgumentTypeError(
-                "release number should of the form '6.dd.dd'")
-        setattr(namespace, self.dest, value)
-        return value
-
-parser = argparse.ArgumentParser(
-    description='Build PyPi package for cppyy containing the minimum of ROOT')
-parser.add_argument('-r', '--release', type=str, nargs='?',
-                    action=ReleaseValidation, help='ROOT release to use')
-
-args = parser.parse_args()
-
-
-#
-## ROOT source pull and cleansing
-#
-def clean_directory(directory, keeplist, trim_cmake=True):
-    removed_entries = []
-    for entry in os.listdir(directory):
-        if entry[0] == '.' or entry in keeplist:
-            continue
-        removed_entries.append(entry)
-        entry = os.path.join(directory, entry)
-        print('now removing', entry)
-        if os.path.isdir(entry):
-            shutil.rmtree(entry)
-        else:
-            os.remove(entry)
-
-    if not trim_cmake:
-        return
-
-    # now take the removed entries out of the CMakeLists.txt
-    if removed_entries:
-        inp = os.path.join(directory, 'CMakeLists.txt')
-        print('trimming', inp)
-        outp = inp+'.new'
-        new_cml = open(outp, 'w')
-        for line in open(inp).readlines():
-            if ('add_subdirectory' in line) or\
-               ('COMMAND' in line and 'copy' in line) or\
-               ('ROOT_ADD_TEST_SUBDIRECTORY' in line) or\
-               ('install(DIRECTORY' in line):
-                for sub in removed_entries:
-                    if sub in line:
-                        line = '#'+line
-                        break
-            new_cml.write(line)
-        new_cml.close()
-        os.rename(outp, inp)
-    else:
-        print('reusing existing %s/CMakeLists.txt' % (directory,))
- 
-
-class ReleaseValidation(argparse.Action):
-    def __call__(self, parser, namespace, value, option_string=None):
-        if not re.match(r'6\.\d\d\.\d\d', value):
-            raise argparse.ArgumentTypeError(
-                "release number should of the form '6.dd.dd'")
-        setattr(namespace, self.dest, value)
-        return value
-
-parser = argparse.ArgumentParser(
-    description='Build PyPi package for cppyy containing the minimum of ROOT')
-parser.add_argument('-r', '--release', type=str, nargs='?',
-                    action=ReleaseValidation, help='ROOT release to use')
-
-args = parser.parse_args()
-
-if not os.path.exists(TARBALL_CACHE_DIR):
-    os.mkdir(TARBALL_CACHE_DIR)
-
-if args.release:
-  # use provided release
-    fn = 'root_v%s.source.tar.gz' % args.release
-    addr = 'https://root.cern.ch/download/'+fn
-    if not os.path.exists(os.path.join(TARBALL_CACHE_DIR, fn)):
-        try:
-            print('retrieving', fn)
-            resp = urllib2.urlopen(addr, fn)
-            out = open(os.path.join(TARBALL_CACHE_DIR, fn), 'wb')
-            out.write(resp.read())
-            out.close()
-        except urllib2.HTTPError:
-            print('release %s not found' % args.release)
-            sys.exit(ERR_RELEASE_NOT_FOUND)
-    else:
-        print('reusing', fn, 'from local directory')
-else:
-    print('provide release ... getting latest release is not yet implemented ...')
-    sys.exit(1)
-  # get latest and set fn, args.release, etc.
-
-# construct version for package
-args.version = ''
-testnext = False
-for c in args.release:
-    if testnext:
-        testnext = False
-        if c == '0':
-            continue
-    if c == '.':
-        testnext = True
-    args.version += c
-args.version += '.0'
-
-fn = os.path.join(TARBALL_CACHE_DIR, fn)
-pkgdir = os.path.join('root-'+args.release)
-if not os.path.exists(pkgdir):
-    print('now extracting', args.release)
-    tf = tarfile.TarFile.gzopen(fn)
-    tf.extractall()
-    tf.close()
-else:
-    print('reusing existing directory', pkgdir)
-
-# remove everything except for the listed set of libraries
-os.chdir(pkgdir)
-
-clean_directory(os.path.curdir, ROOT_KEEP)
-clean_directory('core',         ROOT_CORE_KEEP)
-clean_directory('etc',          ROOT_ETC_KEEP, trim_cmake=False)
-clean_directory('io',           ROOT_IO_KEEP)
-clean_directory('math',         ROOT_MATH_KEEP)
-clean_directory('net',          ROOT_NET_KEEP)
-
-
-# trim main (only need rootcling)
-print('trimming main')
-for entry in os.listdir('main/src'):
-    if entry != 'rootcling.cxx':
-        os.remove('main/src/'+entry)
-inp = 'main/CMakeLists.txt'
-outp = inp+'.new'
-new_cml = open(outp, 'w')
-for line in open(inp).readlines():
-    if ('ROOT_EXECUTABLE' in line or\
-        'SET_TARGET_PROPERTIES' in line) and\
-       not 'rootcling' in line:
-        line = '#'+line
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-
-# remove afterimage and ftgl explicitly
-print('trimming externals')
-for cmf in ['AfterImage', 'FTGL']:
-    os.remove('cmake/modules/Find%s.cmake' % (cmf,))
-inp = 'cmake/modules/SearchInstalledSoftware.cmake'
-outp = inp+'.new'
-now_stripping = False
-new_cml = open(outp, 'w')
-for line in open(inp).readlines():
-    if '#---Check for ftgl if needed' == line[0:28] or\
-       '#---Check for AfterImage' == line[0:24]:
-        now_stripping = True
-    elif '#---Check' == line[0:9]:
-        now_stripping = False
-    if now_stripping:
-        line = '#'+line
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-inp = 'cmake/modules/RootBuildOptions.cmake'
-outp = inp+'.new'
-new_cml = open(outp, 'w')
-for line in open(inp).readlines():
-    if 'ROOT_BUILD_OPTION(builtin_ftgl' in line or\
-       'ROOT_BUILD_OPTION(builtin_afterimage' in line:
-        line = '#'+line
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-
-# remove testing and examples
-print('trimming testing')
-inp = 'CMakeLists.txt'
-outp = inp+'.new'
-now_stripping = False
-new_cml = open(outp, 'w')
-for line in open(inp).readlines():
-    if '#---Configure Testing using CTest' == line[0:33] or\
-       '#---hsimple.root' == line[0:16]:
-        now_stripping = True
-    elif '#---Packaging' == line[0:13] or\
-         '#---version' == line[0:11]:
-        now_stripping = False
-    if now_stripping:
-        line = '#'+line
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-print('trimming RootCPack')
-inp = 'cmake/modules/RootCPack.cmake'
-outp = inp+'.new'
-new_cml = open(outp, 'w')
-for line in open(inp):
-    if 'README.txt' in line:
-        line = '#'+line
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-# some more explicit removes:
-for dir_to_remove in ROOT_EXPLICIT_REMOVE:
-    try:
-        shutil.rmtree(dir_to_remove)
-    except OSError:
-        pass
-
-# special fixes
-inp = 'core/base/src/TVirtualPad.cxx'
-outp = inp+'.new'
-new_cml = open(outp, 'w')
-for line in open(inp):
-    if '#include "X3DBuffer.h"' == line[0:22]:
-        line = """//#include "X3DBuffer.h"
-typedef struct _x3d_sizeof_ {
-   int  numPoints;
-   int  numSegs;
-   int  numPolys;
-} Size3D;
-"""
-    new_cml.write(line)
-new_cml.close()
-os.rename(outp, inp)
-
-inp = 'math/mathcore/src/Fitter.cxx'
-if os.path.exists(inp):
-    outp = inp+'.new'
-    new_cml = open(outp, 'w')
-    for line in open(inp):
-        if '#include "TF1.h"' in line:
-            continue
-        new_cml.write(line)
-    new_cml.close()
-    os.rename(outp, inp)
-
-# done
-os.chdir(os.path.pardir)
-
-# debugging: run a test build
-if DEBUG_TESTBUILD:
-    print('running a debug test build')
-    tb = "test_builddir"
-    if os.path.exists(tb):
-        shutil.rmtree(tb)
-    os.mkdir(tb)
-    os.chdir(tb)
-    os.system('cmake ../%s -DCMAKE_INSTALL_PREFIX=../install -Dminimal=ON -Dasimage=OFF' % pkgdir)
-    os.system('make -j 32')
-
-
-#
-## package creation
-#
-countdown = 0
-pidir = 'Package-'+args.release
-print('creating package', pidir)
-if not os.path.exists(pidir):
-    os.mkdir(pidir)
-os.chdir(pidir); countdown += 1
-
-print('creating LICENSE.txt')
-with open('LICENSE.txt', 'w') as outp:
-    outp.write("""There are three main parts:
-
- LLVM: distributed under University of Illinois/NCSA Open Source License
-   https://opensource.org/licenses/UoI-NCSA.php
- ROOT: distributed under LGPL 2.1
-   https://root.cern.ch/license
- Cppyy: distributed under LBNL BSD
-   https://fedoraproject.org/wiki/Licensing/LBNLBSD
-""")
-
-print('creating MANIFEST.in')
-with open('MANIFEST.in', 'w') as outp:
-    outp.write("""# Include the license file
-include LICENSE.txt
-
-# Include the data files
-recursive-include src *
-""")
-
-print('creating README.rst')
-with open('README.rst', 'w') as outp:
-    outp.write("""PyPy cling-support
-==================
-
-----
-
-Find the documentation here:
-  http://doc.pypy.org/en/latest/cppyy.html
-""")
-
-print('creating setup.cfg')
-with open('setup.cfg', 'w') as outp:
-    outp.write("""[bdist_wheel]
-universal=0
-""")
-
-print('creating setup.py')
-with open('setup.py', 'w') as outp:
-    outp.write("""import os, sys, subprocess
-from setuptools import setup, find_packages
-from distutils import log
-from distutils.command.build import build as _build
-from setuptools.command.install import install as _install
-from distutils.sysconfig import get_python_lib
-from distutils.errors import DistutilsSetupError
-from codecs import open
-
-here = os.path.abspath(os.path.dirname(__file__))
-with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
-    long_description = f.read()
-
-builddir = None
-def get_builddir():
-    global builddir
-    if builddir is None:
-        topdir = os.getcwd()
-        builddir = os.path.join(topdir, 'builddir')
-    return builddir
-
-srcdir = None
-def get_srcdir():
-    global srcdir
-    if srcdir is None:
-        topdir = os.getcwd()
-        srcdir = os.path.join(topdir, 'src', 'backend')
-    return srcdir
-
-class my_cmake_build(_build):
-    def __init__(self, dist, *args, **kwargs):
-        _build.__init__(self, dist, *args, **kwargs)
-        # TODO: can't seem to find a better way of getting hold of
-        # the install_lib parameter during the build phase ...
-        prefix = ''
-        try:
-            prefix = dist.get_command_obj('install').install_lib
-        except AttributeError:
-            pass
-        if not prefix:
-            prefix = get_python_lib(1, 0)
-        self.prefix = os.path.join(prefix, 'cppyy_backend')
-
-    def run(self):
-        # base run
-        _build.run(self)
-
-        # custom run
-        log.info('Now building libcppyy_backend.so and dependencies')
-        builddir = get_builddir()
-        srcdir = get_srcdir()
-        if not os.path.exists(builddir):
-            log.info('Creating build directory %s ...' % builddir)
-            os.makedirs(builddir)
-
-        os.chdir(builddir)
-        log.info('Running cmake for cppyy_backend')
-        if subprocess.call([
-                'cmake', srcdir, '-Dminimal=ON -Dasimage=OFF',
-                '-DCMAKE_INSTALL_PREFIX='+self.prefix]) != 0:
-            raise DistutilsSetupError('Failed to configure cppyy_backend')
-
-        nprocs = os.getenv("MAKE_NPROCS")
-        if nprocs:
-            try:
-                ival = int(nprocs)
-                nprocs = '-j'+nprocs
-            except ValueError:
-                log.warn("Integer expected for MAKE_NPROCS, but got %s (ignored)", nprocs)
-                nprocs = '-j1'
-        else:
-            nprocs = '-j1'
-        log.info('Now building cppyy_backend and dependencies ...')
-        if subprocess.call(['make', nprocs]) != 0:
-            raise DistutilsSetupError('Failed to build cppyy_backend')
-
-        log.info('build finished')
-
-class my_libs_install(_install):
-    def run(self):
-        # base install
-        _install.run(self)
-
-        # custom install
-        log.info('Now installing libcppyy_backend.so and dependencies')
-        builddir = get_builddir()
-        if not os.path.exists(builddir):
-            raise DistutilsSetupError('Failed to find build dir!')
-        os.chdir(builddir)
-
-        prefix = self.install_lib
-        log.info('Now installing in %s ...', prefix)
-        if subprocess.call(['make', 'install']) != 0:
-            raise DistutilsSetupError('Failed to install cppyy_backend')
-
-        log.info('install finished')
-
-    def get_outputs(self):
-        outputs = _install.get_outputs(self)
-        outputs.append(os.path.join(self.install_lib, 'cppyy_backend'))
-        return outputs
-
-setup(
-    name='PyPy-cppyy-backend',
-""")
-    outp.write("    version='%s', # corresponds to ROOT %s, extra number is for packager\n"\
-         % (args.version, args.release))
-    outp.write("""    description='Cling support for PyPy',
-    long_description=long_description,
-
-    url='http://pypy.org',
-
-    # Author details
-    author='PyPy Developers',
-    author_email='pypy-dev at python.org',
-
-    license='LLVM: UoI-NCSA; ROOT: LGPL 2.1; Cppyy: LBNL BSD',
-
-    classifiers=[
-        'Development Status :: 4 - Beta',
-
-        'Intended Audience :: Developers',
-
-        'Topic :: Software Development',
-        'Topic :: Software Development :: Interpreters',
-
-        #'License :: OSI Approved :: MIT License',
-
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: Implementation :: PyPy',
-        'Programming Language :: C',
-        'Programming Language :: C++',
-
-        'Natural Language :: English'
-    ],
-
-    keywords='interpreter development',
-
-    packages=find_packages('src', ['backend']),
-    include_package_data=True,
-
-    extras_require={
-    },
-
-    cmdclass = {
-        'build': my_cmake_build,
-        'install': my_libs_install,
-    },
-)
-""")
-
-
-print('creating src ... ROOT part')
-if not os.path.exists('src'):
-    os.mkdir('src')
-os.chdir('src'); countdown += 1
-if not os.path.exists('backend'):
-    src = os.path.join(os.path.pardir, os.path.pardir, pkgdir)
-    print('now copying', src)
-    shutil.copytree(src, 'backend')
-
-print('creating src ... cppyy part')
-os.chdir('backend'); countdown += 1
-if not os.path.exists('cppyy'):
-    os.mkdir('cppyy')
-    os.chdir('cppyy'); countdown += 1
-
-    with open('CMakeLists.txt', 'w') as outp:
-        outp.write("""############################################################################
-# CMakeLists.txt file for building cppyy package
-############################################################################
-

From pypy.commits at gmail.com  Thu Nov 30 14:41:25 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 30 Nov 2017 11:41:25 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Module names inside a zip are not
 fsencoded - they can be any str
Message-ID: <5a205ee5.31a9df0a.42891.7e34@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93226:f1a556ffff93
Date: 2017-11-30 19:39 +0000
http://bitbucket.org/pypy/pypy/changeset/f1a556ffff93/

Log:	Module names inside a zip are not fsencoded - they can be any str

diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py
--- a/pypy/module/zipimport/interp_zipimport.py
+++ b/pypy/module/zipimport/interp_zipimport.py
@@ -47,7 +47,7 @@
     # THIS IS A TERRIBLE HACK TO BE CPYTHON COMPATIBLE
 
     def getitem(self, space, w_name):
-        return self._getitem(space, space.fsencode_w(w_name))
+        return self._getitem(space, space.text_w(w_name))
 
     def _getitem(self, space, name):
         try:
@@ -90,14 +90,14 @@
     def iteritems(self, space):
         return space.iter(self.items(space))
 
-    @unwrap_spec(name='fsencode')
+    @unwrap_spec(name='text')
     def contains(self, space, name):
         return space.newbool(name in self.cache)
 
     def clear(self, space):
         self.cache = {}
 
-    @unwrap_spec(name='fsencode')
+    @unwrap_spec(name='text')
     def delitem(self, space, name):
         del self.cache[name]
 
@@ -221,7 +221,7 @@
         except KeyError:
             return False
 
-    @unwrap_spec(fullname='fsencode')
+    @unwrap_spec(fullname='text')
     def find_module(self, space, fullname, w_path=None):
         filename = self.make_filename(fullname)
         for _, _, ext in ENUMERATE_EXTS:
@@ -247,7 +247,7 @@
         return self.filename + os.path.sep + filename
 
     def load_module(self, space, w_fullname):
-        fullname = space.fsencode_w(w_fullname)
+        fullname = space.text_w(w_fullname)
         filename = self.make_filename(fullname)
         for compiled, is_package, ext in ENUMERATE_EXTS:
             fname = filename + ext
@@ -287,7 +287,7 @@
                     raise
         raise oefmt(get_error(space), "can't find module %R", w_fullname)
 
-    @unwrap_spec(filename='fsencode')
+    @unwrap_spec(filename='text')
     def get_data(self, space, filename):
         filename = self._find_relative_path(filename)
         try:
@@ -301,7 +301,7 @@
             raise zlib_error(space, e.msg)
 
     def get_code(self, space, w_fullname):
-        fullname = space.fsencode_w(w_fullname)
+        fullname = space.text_w(w_fullname)
         filename = self.make_filename(fullname)
         for compiled, _, ext in ENUMERATE_EXTS:
             if self.have_modulefile(space, filename + ext):
@@ -325,7 +325,7 @@
                     "Cannot find source or code for %R in %R",
                     w_fullname, space.newfilename(self.name))
 
-    @unwrap_spec(fullname='fsencode')
+    @unwrap_spec(fullname='text')
     def get_source(self, space, fullname):
         filename = self.make_filename(fullname)
         found = False
@@ -348,7 +348,7 @@
                     space.newfilename(self.name))
 
     def get_filename(self, space, w_fullname):
-        fullname = space.fsencode_w(w_fullname)
+        fullname = space.text_w(w_fullname)
         filename = self.make_filename(fullname)
         for _, is_package, ext in ENUMERATE_EXTS:
             if self.have_modulefile(space, filename + ext):
@@ -360,7 +360,7 @@
                     space.newfilename(self.name))
 
     def is_package(self, space, w_fullname):
-        fullname = space.fsencode_w(w_fullname)
+        fullname = space.text_w(w_fullname)
         filename = self.make_filename(fullname)
         for _, is_package, ext in ENUMERATE_EXTS:
             if self.have_modulefile(space, filename + ext):
@@ -385,7 +385,7 @@
             return True, self.filename + os.path.sep + self.corr_zname(dirpath)
         return False, None
 
-    @unwrap_spec(fullname='fsencode')
+    @unwrap_spec(fullname='text')
     def find_loader(self, space, fullname, w_path=None):
         found, ns_portion = self._find_loader(space, fullname)
         if not found:
@@ -401,9 +401,9 @@
     name = space.fsencode_w(w_name)
     ok = False
     parts_ends = [i for i in range(0, len(name))
-                    if name[i] == os.path.sep or name[i] == ZIPSEP]
+            if name[i] == os.path.sep or name[i] == ZIPSEP]
     parts_ends.append(len(name))
-    filename = "" # make annotator happy
+    filename = ""  # make annotator happy
     for i in parts_ends:
         filename = name[:i]
         if not filename:

From pypy.commits at gmail.com  Thu Nov 30 15:43:39 2017
From: pypy.commits at gmail.com (arigo)
Date: Thu, 30 Nov 2017 12:43:39 -0800 (PST)
Subject: [pypy-commit] pypy unicode-utf8: I think this is a speed-up
Message-ID: <5a206d7b.e1acdf0a.492d7.9357@mx.google.com>

Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r93227:91d2d71881e2
Date: 2017-11-30 21:43 +0100
http://bitbucket.org/pypy/pypy/changeset/91d2d71881e2/

Log:	I think this is a speed-up

diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py
--- a/rpython/rlib/rutf8.py
+++ b/rpython/rlib/rutf8.py
@@ -86,8 +86,8 @@
     """Gives the position of the next codepoint after pos.
     Assumes valid utf8.  'pos' must be before the end of the string.
     """
+    assert pos >= 0
     chr1 = ord(code[pos])
-    assert pos >= 0
     if chr1 <= 0x7F:
         return pos + 1
     if chr1 <= 0xDF:

From pypy.commits at gmail.com  Thu Nov 30 21:08:39 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 30 Nov 2017 18:08:39 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: .pyo suffix is meaningless now (PEP 488)
Message-ID: <5a20b9a7.c9061c0a.97b5f.914f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93228:f8e7ad765a37
Date: 2017-12-01 02:06 +0000
http://bitbucket.org/pypy/pypy/changeset/f8e7ad765a37/

Log:	.pyo suffix is meaningless now (PEP 488)

diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py
--- a/pypy/interpreter/app_main.py
+++ b/pypy/interpreter/app_main.py
@@ -729,7 +729,7 @@
                     SourceFileLoader, SourcelessFileLoader)
             if IS_WINDOWS:
                 filename = filename.lower()
-            if filename.endswith('.pyc') or filename.endswith('.pyo'):
+            if filename.endswith('.pyc'):
                 # We don't actually load via SourcelessFileLoader
                 # because '__main__' must not be listed inside
                 # 'importlib._bootstrap._module_locks' (it deadlocks
diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py
--- a/pypy/interpreter/mixedmodule.py
+++ b/pypy/interpreter/mixedmodule.py
@@ -254,7 +254,7 @@
         assert typ == imp.PY_SOURCE
         source = file.read()
         file.close()
-        if fn.endswith('.pyc') or fn.endswith('.pyo'):
+        if fn.endswith('.pyc'):
             fn = fn[:-1]
         app = gateway.applevel(source, filename=fn, modname=appname)
         applevelcache[impbase] = app
diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py
--- a/pypy/module/imp/interp_imp.py
+++ b/pypy/module/imp/interp_imp.py
@@ -27,7 +27,7 @@
 
 def get_tag(space):
     """get_tag() -> string
-    Return the magic tag for .pyc or .pyo files."""
+    Return the magic tag for .pyc files."""
     return space.newtext(importing.PYC_TAG)
 
 def get_file(space, w_file, filename, filemode):
diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py
--- a/pypy/module/imp/test/test_app.py
+++ b/pypy/module/imp/test/test_app.py
@@ -85,7 +85,7 @@
                 assert suffix == '.py'
                 assert mode == 'r'
             elif type == imp.PY_COMPILED:
-                assert suffix in ('.pyc', '.pyo')
+                assert suffix == '.pyc'
                 assert mode == 'rb'
             elif type == imp.C_EXTENSION:
                 assert suffix.endswith(('.pyd', '.so'))
diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py
--- a/pypy/module/zipimport/interp_zipimport.py
+++ b/pypy/module/zipimport/interp_zipimport.py
@@ -18,10 +18,8 @@
 
 ENUMERATE_EXTS = unrolling_iterable(
     [(True, True, ZIPSEP + '__init__.pyc'),
-     (True, True, ZIPSEP + '__init__.pyo'),
      (False, True, ZIPSEP + '__init__.py'),
      (True, False, '.pyc'),
-     (True, False, '.pyo'),
      (False, False, '.py')])
 
 class Cache:
diff --git a/pypy/sandbox/pypy_interact.py b/pypy/sandbox/pypy_interact.py
--- a/pypy/sandbox/pypy_interact.py
+++ b/pypy/sandbox/pypy_interact.py
@@ -46,7 +46,7 @@
         # * can access its own executable
         # * can access the pure Python libraries
         # * can access the temporary usession directory as /tmp
-        exclude = ['.pyc', '.pyo']
+        exclude = ['.pyc']
         if self.tmpdir is None:
             tmpdirnode = Dir({})
         else:
@@ -57,7 +57,7 @@
             'bin': Dir({
                 'pypy3-c': RealFile(self.executable, mode=0111),
                 'lib-python': RealDir(os.path.join(libroot, 'lib-python'),
-                                      exclude=exclude), 
+                                      exclude=exclude),
                 'lib_pypy': RealDir(os.path.join(libroot, 'lib_pypy'),
                                       exclude=exclude),
                 }),
@@ -66,7 +66,7 @@
 
 def main():
     from getopt import getopt      # and not gnu_getopt!
-    options, arguments = getopt(sys.argv[1:], 't:hv', 
+    options, arguments = getopt(sys.argv[1:], 't:hv',
                                 ['tmp=', 'heapsize=', 'timeout=', 'log=',
                                  'verbose', 'help'])
     tmpdir = None

From pypy.commits at gmail.com  Thu Nov 30 21:27:49 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 30 Nov 2017 18:27:49 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: Module names inside a zip are not
 fsencoded, part 2
Message-ID: <5a20be25.02be1c0a.c0baf.304f@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93229:8309e6092c02
Date: 2017-12-01 02:25 +0000
http://bitbucket.org/pypy/pypy/changeset/8309e6092c02/

Log:	Module names inside a zip are not fsencoded, part 2

diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py
--- a/pypy/module/zipimport/interp_zipimport.py
+++ b/pypy/module/zipimport/interp_zipimport.py
@@ -1,3 +1,5 @@
+import os
+import stat
 
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.interpreter.error import OperationError, oefmt
@@ -9,8 +11,6 @@
 from rpython.rlib.unroll import unrolling_iterable
 from rpython.rlib.rzipfile import RZipFile, BadZipfile
 from rpython.rlib.rzlib import RZlibError
-import os
-import stat
 
 ZIPSEP = '/'
 # note that zipfiles always use slash, but for OSes with other
@@ -145,7 +145,7 @@
             return fname
 
     def import_py_file(self, space, modname, filename, buf, pkgpath):
-        w_mod = Module(space, space.newfilename(modname))
+        w_mod = Module(space, space.newtext(modname))
         real_name = self.filename + os.path.sep + self.corr_zname(filename)
         space.setattr(w_mod, space.newtext('__loader__'), self)
         importing._prepare_module(space, w_mod, real_name, pkgpath)

From pypy.commits at gmail.com  Thu Nov 30 21:27:51 2017
From: pypy.commits at gmail.com (rlamy)
Date: Thu, 30 Nov 2017 18:27:51 -0800 (PST)
Subject: [pypy-commit] pypy py3.5: hg merge default
Message-ID: <5a20be27.11c6df0a.4c8c1.437c@mx.google.com>

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r93230:c932756506d4
Date: 2017-12-01 02:26 +0000
http://bitbucket.org/pypy/pypy/changeset/c932756506d4/

Log:	hg merge default

diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py
--- a/rpython/jit/metainterp/optimizeopt/intutils.py
+++ b/rpython/jit/metainterp/optimizeopt/intutils.py
@@ -273,7 +273,8 @@
         return r
 
     def contains(self, val):
-        assert not isinstance(val, long)
+        if not we_are_translated():
+            assert not isinstance(val, long)
         if not isinstance(val, int):
             if ((not self.has_lower or self.lower == MININT) and
                 not self.has_upper or self.upper == MAXINT):