From pypy.commits at gmail.com Wed Nov 1 05:52:52 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 01 Nov 2017 02:52:52 -0700 (PDT) Subject: [pypy-commit] pypy keep-debug-symbols: close branch to be merged Message-ID: <59f99974.4e9d1c0a.3489.17c7@mx.google.com> Author: Antonio Cuni Branch: keep-debug-symbols Changeset: r92894:f29f0f12ffa8 Date: 2017-11-01 10:45 +0100 http://bitbucket.org/pypy/pypy/changeset/f29f0f12ffa8/ Log: close branch to be merged From pypy.commits at gmail.com Wed Nov 1 05:52:54 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 01 Nov 2017 02:52:54 -0700 (PDT) Subject: [pypy-commit] pypy default: merge the branch keep-debug-symbols: Message-ID: <59f99976.87271c0a.9cae7.0b5d@mx.google.com> Author: Antonio Cuni Branch: Changeset: r92895:77fff565d382 Date: 2017-11-01 10:51 +0100 http://bitbucket.org/pypy/pypy/changeset/77fff565d382/ Log: merge the branch keep-debug-symbols: - symbols are stripped from the executable and placed in a file libpypy-c.so.debug - we add a gnu-debug-link section to libpypy-c.so which points to .debug, so that it works transparently in gdb - this generates immensely more useful stack trace inside gdb; moreover, it is also potentially usable by vmprof - the .debug file is ~18MB. The tarball size goes from 22MB to 25MB. I claim that disk space and bandwidth are cheap, so we should just don't care, especially for nightly builds - if we REALLY care about the tarball size of official releases, we can simply remove the .debug from the tarball diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -21,6 +21,7 @@ import fnmatch import subprocess import glob +from pypy.tool.release.smartstrip import smartstrip if sys.version_info < (2,6): py.test.skip("requires 2.6 so far") @@ -212,15 +213,8 @@ old_dir = os.getcwd() try: os.chdir(str(builddir)) - if not options.nostrip: - for source, target in binaries: - if sys.platform == 'win32': - pass - elif sys.platform == 'darwin': - # 'strip' fun: see issue #587 for why -x - os.system("strip -x " + str(bindir.join(target))) # ignore errors - else: - os.system("strip " + str(bindir.join(target))) # ignore errors + for source, target in binaries: + smartstrip(bindir.join(target), keep_debug=options.keep_debug) # if USE_ZIPFILE_MODULE: import zipfile @@ -281,8 +275,8 @@ help='do not build and package the %r cffi module' % (key,)) parser.add_argument('--without-cffi', dest='no_cffi', action='store_true', help='skip building *all* the cffi modules listed above') - parser.add_argument('--nostrip', dest='nostrip', action='store_true', - help='do not strip the exe, making it ~10MB larger') + parser.add_argument('--no-keep-debug', dest='keep_debug', + action='store_false', help='do not keep debug symbols') parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe, help='target executable name, defaults to "pypy"') parser.add_argument('--archive-name', dest='name', type=str, default='', @@ -295,8 +289,8 @@ help='use as pypy exe instead of pypy/goal/pypy-c') options = parser.parse_args(args) - if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): - options.nostrip = True + if os.environ.has_key("PYPY_PACKAGE_NOKEEPDEBUG"): + options.keep_debug = False if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): options.no_tk = True if not options.builddir: diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py new file mode 100644 --- /dev/null +++ b/pypy/tool/release/smartstrip.py @@ -0,0 +1,32 @@ +""" +Strip symbols from an executable, but keep them in a .debug file +""" + +import sys +import os +import py + +def _strip(exe): + if sys.platform == 'win32': + pass + elif sys.platform == 'darwin': + # 'strip' fun: see issue #587 for why -x + os.system("strip -x " + str(exe)) # ignore errors + else: + os.system("strip " + str(exe)) # ignore errors + +def _extract_debug_symbols(exe, debug): + if sys.platform == 'linux2': + os.system("objcopy --only-keep-debug %s %s" % (exe, debug)) + os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe)) + +def smartstrip(exe, keep_debug=True): + exe = py.path.local(exe) + debug = py.path.local(str(exe) + '.debug') + if keep_debug: + _extract_debug_symbols(exe, debug) + _strip(exe) + + +if __name__ == '__main__': + smartstrip(sys.argv[1]) diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py new file mode 100644 --- /dev/null +++ b/pypy/tool/release/test/test_smartstrip.py @@ -0,0 +1,50 @@ +import pytest +import sys +import os +from commands import getoutput +from pypy.tool.release.smartstrip import smartstrip + + at pytest.fixture +def exe(tmpdir): + src = tmpdir.join("myprog.c") + src.write(""" + int foo(int a, int b) { + return a+b; + } + int main(void) { } + """) + exe = tmpdir.join("myprog") + ret = os.system("gcc -o %s %s" % (exe, src)) + assert ret == 0 + return exe + +def info_symbol(exe, symbol): + out = getoutput("gdb %s -ex 'info symbol %s' -ex 'quit'" % (exe, symbol)) + lines = out.splitlines() + return lines[-1] + + at pytest.mark.skipif(sys.platform == 'win32', + reason='strip not supported on windows') +class TestSmarStrip(object): + + def test_info_symbol(self, exe): + info = info_symbol(exe, "foo") + assert info == "foo in section .text" + + def test_strip(self, exe): + smartstrip(exe, keep_debug=False) + info = info_symbol(exe, "foo") + assert info.startswith("No symbol table is loaded") + + @pytest.mark.skipif(sys.platform != 'linux2', + reason='keep_debug not supported') + def test_keep_debug(self, exe, tmpdir): + smartstrip(exe, keep_debug=True) + debug = tmpdir.join("myprog.debug") + assert debug.check(file=True) + info = info_symbol(exe, "foo") + assert info == "foo in section .text of %s" % exe + # + debug.remove() + info = info_symbol(exe, "foo") + assert info.startswith("No symbol table is loaded") From pypy.commits at gmail.com Wed Nov 1 15:30:42 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 01 Nov 2017 12:30:42 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: patches from issue 2694, implement ctypes.CDLL(... handle=n) Message-ID: <59fa20e2.95b6df0a.e17b6.799e@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92896:86e686981d73 Date: 2017-11-01 21:13 +0200 http://bitbucket.org/pypy/pypy/changeset/86e686981d73/ Log: patches from issue 2694, implement ctypes.CDLL(... handle=n) diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -314,7 +314,7 @@ # ======================================================================== class W_CDLL(W_Root): - def __init__(self, space, name, mode): + def __init__(self, space, name, mode, handle): self.flags = libffi.FUNCFLAG_CDECL self.space = space if name is None: @@ -322,7 +322,7 @@ else: self.name = name try: - self.cdll = libffi.CDLL(name, mode) + self.cdll = libffi.CDLL(name, mode, handle) except DLOpenError as e: raise wrap_dlopenerror(space, e, self.name) except OSError as e: @@ -344,9 +344,9 @@ def getidentifier(self, space): return space.newint(self.cdll.getidentifier()) - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_cdll(space, w_type, name, mode=-1): - return W_CDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_cdll(space, w_type, name, mode=-1, handle=0): + return W_CDLL(space, name, mode, handle) W_CDLL.typedef = TypeDef( @@ -359,13 +359,13 @@ ) class W_WinDLL(W_CDLL): - def __init__(self, space, name, mode): - W_CDLL.__init__(self, space, name, mode) + def __init__(self, space, name, mode, handle): + W_CDLL.__init__(self, space, name, mode, handle) self.flags = libffi.FUNCFLAG_STDCALL - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_windll(space, w_type, name, mode=-1): - return W_WinDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_windll(space, w_type, name, mode=-1, handle=0): + return W_WinDLL(space, name, mode, handle) W_WinDLL.typedef = TypeDef( @@ -380,4 +380,4 @@ # ======================================================================== def get_libc(space): - return W_CDLL(space, get_libc_name(), -1) + return W_CDLL(space, get_libc_name(), -1, 0) diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py --- a/rpython/rlib/libffi.py +++ b/rpython/rlib/libffi.py @@ -434,11 +434,12 @@ # XXX: it partially duplicate the code in clibffi.py class CDLL(object): - def __init__(self, libname, mode=-1): + def __init__(self, libname, mode=-1, lib=0): """Load the library, or raises DLOpenError.""" - self.lib = rffi.cast(DLLHANDLE, 0) - with rffi.scoped_str2charp(libname) as ll_libname: - self.lib = dlopen(ll_libname, mode) + self.lib = rffi.cast(DLLHANDLE, lib) + if lib == 0: + with rffi.scoped_str2charp(libname) as ll_libname: + self.lib = dlopen(ll_libname, mode) def __del__(self): if self.lib: From pypy.commits at gmail.com Wed Nov 1 15:30:44 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 01 Nov 2017 12:30:44 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: patch from issue2695, fail early in ll2ctypes RTLD code Message-ID: <59fa20e4.48d31c0a.39652.cc49@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92897:16db4e36eac0 Date: 2017-11-01 21:14 +0200 http://bitbucket.org/pypy/pypy/changeset/16db4e36eac0/ Log: patch from issue2695, fail early in ll2ctypes RTLD code diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -1147,7 +1147,7 @@ libc_name = get_libc_name() # Make sure the name is determined during import, not at runtime if _FREEBSD: RTLD_DEFAULT = -2 # see - rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs) + rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs) # XXX is this always correct??? standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs) @@ -1243,7 +1243,7 @@ if cfunc is None: if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'): - cfunc = get_on_lib(rtld_default_lib, funcname) + cfunc = rtld_default_lib[funcname] else: cfunc = get_on_lib(standard_c_lib, funcname) # XXX magic: on Windows try to load the function from 'kernel32' too From pypy.commits at gmail.com Wed Nov 1 15:30:46 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 01 Nov 2017 12:30:46 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: patches from issue #2696, fix various tests on FreeBSD Message-ID: <59fa20e6.4e9d1c0a.3489.5242@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92898:e9096f3b8ca5 Date: 2017-11-01 21:21 +0200 http://bitbucket.org/pypy/pypy/changeset/e9096f3b8ca5/ Log: patches from issue #2696, fix various tests on FreeBSD diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -1,3 +1,4 @@ +import py import sys from rpython.tool.udir import udir from pypy.tool.pytest.objspace import gettestobjspace @@ -107,6 +108,7 @@ _vmprof.disable() assert _vmprof.is_enabled() is False + @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented") def test_get_profile_path(self): import _vmprof tmpfile = open(self.tmpfilename, 'wb') diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py --- a/pypy/module/termios/test/test_termios.py +++ b/pypy/module/termios/test/test_termios.py @@ -7,9 +7,6 @@ if os.name != 'posix': py.test.skip('termios module only available on unix') -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') - class TestTermios(object): def setup_class(cls): try: diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py --- a/pypy/module/test_lib_pypy/pyrepl/__init__.py +++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py @@ -1,6 +1,3 @@ import sys import lib_pypy.pyrepl sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl'] - -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py --- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py +++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py @@ -4,7 +4,7 @@ @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or " - "'kfreebsd' in sys.platform") + "'freebsd' in sys.platform") def test_raw_input(): import os import pty From pypy.commits at gmail.com Wed Nov 1 15:30:48 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 01 Nov 2017 12:30:48 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: patches from issue #2697 fix compilation on FreeBSD Message-ID: <59fa20e8.46901c0a.c1e8e.7e26@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92899:0e6fa4b45bfa Date: 2017-11-01 21:29 +0200 http://bitbucket.org/pypy/pypy/changeset/0e6fa4b45bfa/ Log: patches from issue #2697 fix compilation on FreeBSD diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py --- a/pypy/tool/cpyext/extbuild.py +++ b/pypy/tool/cpyext/extbuild.py @@ -244,13 +244,13 @@ if sys.platform == 'win32': compile_extra = ["/we4013"] link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')] - elif sys.platform == 'darwin': - compile_extra = link_extra = None - pass elif sys.platform.startswith('linux'): compile_extra = [ "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"] link_extra = None + else: + compile_extra = link_extra = None + pass return ExtensionCompiler( builddir_base=base_dir, include_extra=[get_python_inc()], diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -47,7 +47,10 @@ # Guessing a BSD-like Unix platform compile_extra += ['-DVMPROF_UNIX'] compile_extra += ['-DVMPROF_MAC'] - _libs = [] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] eci_kwds = dict( From pypy.commits at gmail.com Thu Nov 2 04:07:35 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 01:07:35 -0700 (PDT) Subject: [pypy-commit] pypy default: hack the cffi hack to import setuptools on a fresh pypy, gaaaa! Message-ID: <59fad247.9b88df0a.5accb.a6bf@mx.google.com> Author: Matti Picus Branch: Changeset: r92900:a2348b760a36 Date: 2017-11-02 09:52 +0200 http://bitbucket.org/pypy/pypy/changeset/a2348b760a36/ Log: hack the cffi hack to import setuptools on a fresh pypy, gaaaa! diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py --- a/pypy/tool/build_cffi_imports.py +++ b/pypy/tool/build_cffi_imports.py @@ -22,6 +22,12 @@ shutil.rmtree(str(basedir.join('lib_pypy', '__pycache__')), ignore_errors=True) + # be sure pip, setuptools are installed in a fresh pypy + # allows proper functioning of cffi on win32 with newer vc compilers + # XXX move this to a build slave step? + status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import setuptools']) + if status != 0: + status, stdout, stderr = run_subprocess(str(pypy_c), ['-m', 'ensurepip']) failures = [] for key, module in sorted(cffi_build_scripts.items()): if module is None or getattr(options, 'no_' + key, False): From pypy.commits at gmail.com Thu Nov 2 04:07:37 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 01:07:37 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: merge default into branch Message-ID: <59fad249.cc091c0a.9de42.e9fc@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92901:9112cc43a6cc Date: 2017-11-02 09:53 +0200 http://bitbucket.org/pypy/pypy/changeset/9112cc43a6cc/ Log: merge default into branch diff too long, truncating to 2000 out of 3454 lines diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -7,7 +7,7 @@ interpleveldefs = { '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', @@ -15,7 +15,9 @@ '_get_nullptr' : 'interp_cppyy.get_nullptr', 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -217,7 +217,8 @@ 'method_req_args' : ([c_scope, c_index], c_int), 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index], c_ccharp), + 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), 'method_is_template' : ([c_scope, c_index], c_int), 'method_num_template_args' : ([c_scope, c_index], c_int), @@ -498,9 +499,12 @@ def c_method_arg_default(space, cppscope, index, arg_index): args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_method_signature(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) +def c_method_prototype(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] + return charp2str_free(space, call_capi(space, 'method_prototype', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -4,7 +4,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance @@ -21,9 +21,9 @@ # match for the qualified type. -def get_rawobject(space, w_obj): +def get_rawobject(space, w_obj, can_be_None=True): from pypy.module._cppyy.interp_cppyy import W_CPPClass - cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None) if cppinstance: rawobject = cppinstance.get_rawobject() assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -48,17 +48,16 @@ return capi.C_NULL_OBJECT def is_nullpointer_specialcase(space, w_obj): - # 0, None, and nullptr may serve as "NULL", check for any of them + # 0 and nullptr may serve as "NULL" # integer 0 try: return space.int_w(w_obj) == 0 except Exception: pass - # None or nullptr + # C++-style nullptr from pypy.module._cppyy import interp_cppyy - return space.is_true(space.is_(w_obj, space.w_None)) or \ - space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) + return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) def get_rawbuffer(space, w_obj): # raw buffer @@ -74,7 +73,7 @@ return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass - # pre-defined NULL + # pre-defined nullptr if is_nullpointer_specialcase(space, w_obj): return rffi.cast(rffi.VOIDP, 0) raise TypeError("not an addressable buffer") @@ -392,6 +391,7 @@ _immutable_fields_ = ['typecode'] typecode = 'g' + class CStringConverter(TypeConverter): def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.LONGP, address) @@ -408,18 +408,27 @@ def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') +class CStringConverterWithSize(CStringConverter): + _immutable_fields_ = ['size'] + + def __init__(self, space, extra): + self.size = extra + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = self._get_raw_address(space, w_obj, offset) + charpptr = rffi.cast(rffi.CCHARP, address) + strsize = self.size + if charpptr[self.size-1] == '\0': + strsize = self.size-1 # rffi will add \0 back + return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + class VoidPtrConverter(TypeConverter): def _unwrap_object(self, space, w_obj): try: obj = get_rawbuffer(space, w_obj) except TypeError: - try: - # TODO: accept a 'capsule' rather than naked int - # (do accept int(0), though) - obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj)) - except Exception: - obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) + obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False)) return obj def cffi_type(self, space): @@ -463,12 +472,12 @@ def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) ba = rffi.cast(rffi.CCHARP, address) - r = rffi.cast(rffi.VOIDPP, call_local) try: - r[0] = get_rawbuffer(space, w_obj) + x[0] = get_rawbuffer(space, w_obj) except TypeError: + r = rffi.cast(rffi.VOIDPP, call_local) r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) - x[0] = rffi.cast(rffi.VOIDP, call_local) + x[0] = rffi.cast(rffi.VOIDP, call_local) ba[capi.c_function_arg_typeoffset(space)] = self.typecode def finalize_call(self, space, w_obj, call_local): @@ -495,9 +504,13 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPClass if isinstance(w_obj, W_CPPClass): - if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl): + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + # reject moves as all are explicit + raise ValueError("lvalue expected") + if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): rawobject = w_obj.get_rawobject() - offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1) + offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1) obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, @@ -518,6 +531,17 @@ x = rffi.cast(rffi.VOIDPP, address) x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) +class InstanceMoveConverter(InstanceRefConverter): + def _unwrap_object(self, space, w_obj): + # moving is same as by-ref, but have to check that move is allowed + from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE + if isinstance(w_obj, W_CPPClass): + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE + return InstanceRefConverter._unwrap_object(self, space, w_obj) + raise oefmt(space.w_ValueError, "object is not an rvalue") + + class InstanceConverter(InstanceRefConverter): def convert_argument_libffi(self, space, w_obj, address, call_local): @@ -527,7 +551,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): self._is_abstract(space) @@ -548,7 +572,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -582,8 +606,8 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, - do_cast=False, is_ref=True) + return interp_cppyy.wrap_cppinstance( + space, address, self.clsdecl, do_cast=False, is_ref=True) class StdStringConverter(InstanceConverter): @@ -606,7 +630,7 @@ assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy assign.call( - interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value]) + interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -672,7 +696,7 @@ _converters = {} # builtin and custom types _a_converters = {} # array and ptr versions of above -def get_converter(space, name, default): +def get_converter(space, _name, default): # The matching of the name to a converter should follow: # 1) full, exact match # 1a) const-removed match @@ -680,9 +704,9 @@ # 3) accept ref as pointer (for the stubs, const& can be # by value, but that does not work for the ffi path) # 4) generalized cases (covers basically all user classes) - # 5) void converter, which fails on use + # 5) void* or void converter (which fails on use) - name = capi.c_resolve_name(space, name) + name = capi.c_resolve_name(space, _name) # 1) full, exact match try: @@ -701,7 +725,7 @@ clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found - array_size = helper.array_size(name) + array_size = helper.array_size(_name) # uses original arg return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -719,6 +743,8 @@ return InstancePtrConverter(space, clsdecl) elif compound == "&": return InstanceRefConverter(space, clsdecl) + elif compound == "&&": + return InstanceMoveConverter(space, clsdecl) elif compound == "**": return InstancePtrPtrConverter(space, clsdecl) elif compound == "": @@ -726,11 +752,13 @@ elif capi.c_is_enum(space, clean_name): return _converters['unsigned'](space, default) - # 5) void converter, which fails on use - # + # 5) void* or void converter (which fails on use) + if 0 <= compound.find('*'): + return VoidPtrConverter(space, default) # "user knows best" + # return a void converter here, so that the class can be build even - # when some types are unknown; this overload will simply fail on use - return VoidConverter(space, name) + # when some types are unknown + return VoidConverter(space, name) # fails on use _converters["bool"] = BoolConverter @@ -847,6 +875,10 @@ for name in names: _a_converters[name+'[]'] = ArrayConverter _a_converters[name+'*'] = PtrConverter + + # special case, const char* w/ size and w/o '\0' + _a_converters["const char[]"] = CStringConverterWithSize + _build_array_converters() # add another set of aliased names diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -159,7 +159,7 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) return pyres def execute_libffi(self, space, cif_descr, funcaddr, buffer): @@ -167,7 +167,7 @@ result = rffi.ptradd(buffer, cif_descr.exchange_result) from pypy.module._cppyy import interp_cppyy ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) class InstancePtrPtrExecutor(InstancePtrExecutor): @@ -176,7 +176,7 @@ voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) ref_address = rffi.cast(rffi.VOIDPP, voidp_result) ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible @@ -188,8 +188,8 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, + do_cast=False, python_owns=True, fresh=True) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -19,14 +19,15 @@ RPY_EXTERN int cppyy_num_scopes(cppyy_scope_t parent); RPY_EXTERN - char* cppyy_scope_name(cppyy_scope_t parent, int iscope); - + char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope); RPY_EXTERN char* cppyy_resolve_name(const char* cppitem_name); RPY_EXTERN cppyy_scope_t cppyy_get_scope(const char* scope_name); RPY_EXTERN cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj); + RPY_EXTERN + size_t cppyy_size_of(cppyy_type_t klass); /* memory management ------------------------------------------------------ */ RPY_EXTERN @@ -120,6 +121,8 @@ RPY_EXTERN char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN + char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); @@ -130,7 +133,9 @@ RPY_EXTERN char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); @@ -147,8 +152,12 @@ /* method properties ------------------------------------------------------ */ RPY_EXTERN + int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); RPY_EXTERN + int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); /* data member reflection information ------------------------------------- */ diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -2,7 +2,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec -from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w +from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty from pypy.interpreter.baseobjspace import W_Root from rpython.rtyper.lltypesystem import rffi, lltype, llmemory @@ -15,6 +15,10 @@ from pypy.module._cppyy import converter, executor, ffitypes, helper +INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 +INSTANCE_FLAGS_IS_REF = 0x0002 +INSTANCE_FLAGS_IS_R_VALUE = 0x0004 + class FastCallNotPossible(Exception): pass @@ -33,16 +37,21 @@ class State(object): def __init__(self, space): + # final scoped name -> opaque handle self.cppscope_cache = { - "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) } + 'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') } + # opaque handle -> app-level python class + self.cppclass_registry = {} + # app-level class generator callback + self.w_clgen_callback = None + # app-level function generator callback (currently not used) + self.w_fngen_callback = None + # C++11's nullptr self.w_nullptr = None - self.cpptemplate_cache = {} - self.cppclass_registry = {} - self.w_clgen_callback = None - self.w_fngen_callback = None def get_nullptr(space): - if hasattr(space, "fake"): + # construct a unique address that compares to NULL, serves as nullptr + if hasattr(space, 'fake'): raise NotImplementedError state = space.fromcache(State) if state.w_nullptr is None: @@ -58,52 +67,48 @@ state.w_nullptr = nullarr return state.w_nullptr - at unwrap_spec(name='text') -def resolve_name(space, name): - return space.newtext(capi.c_resolve_name(space, name)) + at unwrap_spec(scoped_name='text') +def resolve_name(space, scoped_name): + return space.newtext(capi.c_resolve_name(space, scoped_name)) - at unwrap_spec(name='text') -def scope_byname(space, name): - true_name = capi.c_resolve_name(space, name) +# memoized lookup of handles by final, scoped, name of classes/namespaces + at unwrap_spec(final_scoped_name='text') +def scope_byname(space, final_scoped_name): state = space.fromcache(State) try: - return state.cppscope_cache[true_name] + return state.cppscope_cache[final_scoped_name] except KeyError: pass - opaque_handle = capi.c_get_scope_opaque(space, true_name) + opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name) assert lltype.typeOf(opaque_handle) == capi.C_SCOPE if opaque_handle: - final_name = capi.c_final_name(space, opaque_handle) - if capi.c_is_namespace(space, opaque_handle): - cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle) - elif capi.c_has_complex_hierarchy(space, opaque_handle): - cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle) + isns = capi.c_is_namespace(space, opaque_handle) + if isns: + cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name) else: - cppscope = W_CPPClassDecl(space, final_name, opaque_handle) - state.cppscope_cache[name] = cppscope + if capi.c_has_complex_hierarchy(space, opaque_handle): + cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name) + else: + cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name) - cppscope._build_methods() - cppscope._find_datamembers() + # store in the cache to prevent recursion + state.cppscope_cache[final_scoped_name] = cppscope + + if not isns: + # build methods/data; TODO: also defer this for classes (a functional __dir__ + # and instrospection for help() is enough and allows more lazy loading) + cppscope._build_methods() + cppscope._find_datamembers() + return cppscope return None - at unwrap_spec(name='text') -def template_byname(space, name): - state = space.fromcache(State) - try: - return state.cpptemplate_cache[name] - except KeyError: - pass - - if capi.c_is_template(space, name): - cpptemplate = W_CPPTemplateType(space, name) - state.cpptemplate_cache[name] = cpptemplate - return cpptemplate - - return None + at unwrap_spec(final_scoped_name='text') +def is_template(space, final_scoped_name): + return space.newbool(capi.c_is_template(space, final_scoped_name)) def std_string_name(space): return space.newtext(capi.std_string_name) @@ -189,8 +194,13 @@ # check number of given arguments against required (== total - defaults) args_expected = len(self.arg_defs) args_given = len(args_w) - if args_expected < args_given or args_given < self.args_required: - raise oefmt(self.space.w_TypeError, "wrong number of arguments") + + if args_given < self.args_required: + raise oefmt(self.space.w_TypeError, + "takes at least %d arguments (%d given)", self.args_required, args_given) + elif args_expected < args_given: + raise oefmt(self.space.w_TypeError, + "takes at most %d arguments (%d given)", args_expected, args_given) # initial setup of converters, executors, and libffi (if available) if self.converters is None: @@ -376,8 +386,11 @@ conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i) capi.c_deallocate_function_args(self.space, args) - def signature(self): - return capi.c_method_signature(self.space, self.scope, self.index) + def signature(self, show_formalargs=True): + return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + + def prototype(self, show_formalargs=True): + return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) def priority(self): total_arg_priority = 0 @@ -391,7 +404,7 @@ lltype.free(self.cif_descr, flavor='raw') def __repr__(self): - return "CPPMethod: %s" % self.signature() + return "CPPMethod: %s" % self.prototype() def _freeze_(self): assert 0, "you should never have a pre-built instance of this!" @@ -407,7 +420,7 @@ return capi.C_NULL_OBJECT def __repr__(self): - return "CPPFunction: %s" % self.signature() + return "CPPFunction: %s" % self.prototype() class CPPTemplatedCall(CPPMethod): @@ -440,7 +453,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPTemplatedCall: %s" % self.signature() + return "CPPTemplatedCall: %s" % self.prototype() class CPPConstructor(CPPMethod): @@ -462,7 +475,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPConstructor: %s" % self.signature() + return "CPPConstructor: %s" % self.prototype() class CPPSetItem(CPPMethod): @@ -549,12 +562,12 @@ w_exc_type = e.w_type elif all_same_type and not e.match(self.space, w_exc_type): all_same_type = False - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' '+e.errorstr(self.space) except Exception as e: # can not special case this for non-overloaded functions as we anyway need an # OperationError error down from here - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' Exception: '+str(e) if all_same_type and w_exc_type is not None: @@ -562,20 +575,20 @@ else: raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg)) - def signature(self): - sig = self.functions[0].signature() + def prototype(self): + sig = self.functions[0].prototype() for i in range(1, len(self.functions)): - sig += '\n'+self.functions[i].signature() + sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) def __repr__(self): - return "W_CPPOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', is_static = interp2app(W_CPPOverload.is_static), call = interp2app(W_CPPOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPOverload.prototype), ) @@ -591,24 +604,40 @@ @jit.unroll_safe @unwrap_spec(args_w='args_w') def call(self, w_cppinstance, args_w): + # TODO: factor out the following: + if capi.c_is_abstract(self.space, self.scope.handle): + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.scope.name) w_result = W_CPPOverload.call(self, w_cppinstance, args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) - return w_cppinstance - return wrap_cppobject(self.space, newthis, self.functions[0].scope, - do_cast=False, python_owns=True, fresh=True) def __repr__(self): - return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions] W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', is_static = interp2app(W_CPPConstructorOverload.is_static), call = interp2app(W_CPPConstructorOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPConstructorOverload.prototype), +) + + +class W_CPPTemplateOverload(W_CPPOverload): + @unwrap_spec(args_w='args_w') + def __getitem__(self, args_w): + pass + + def __repr__(self): + return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPTemplateOverload.typedef = TypeDef( + 'CPPTemplateOverload', + __getitem__ = interp2app(W_CPPTemplateOverload.call), ) @@ -622,6 +651,9 @@ def __call__(self, args_w): return self.method.bound_call(self.cppthis, args_w) + def __repr__(self): + return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions] + W_CPPBoundMethod.typedef = TypeDef( 'CPPBoundMethod', __call__ = interp2app(W_CPPBoundMethod.__call__), @@ -643,8 +675,8 @@ def _get_offset(self, cppinstance): if cppinstance: - assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle) - offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope) + assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle) + offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope) else: offset = self.offset return offset @@ -652,7 +684,7 @@ def get(self, w_cppinstance, w_pycppclass): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset) @@ -660,7 +692,7 @@ def set(self, w_cppinstance, w_value): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) self.converter.to_memory(self.space, w_cppinstance, w_value, offset) @@ -705,12 +737,12 @@ return space.w_False class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def __init__(self, space, name, opaque_handle): + def __init__(self, space, opaque_handle, final_scoped_name): self.space = space - self.name = name + self.name = final_scoped_name assert lltype.typeOf(opaque_handle) == capi.C_SCOPE self.handle = opaque_handle self.methods = {} @@ -753,7 +785,7 @@ overload = self.get_overload(name) sig = '(%s)' % signature for f in overload.functions: - if 0 < f.signature().find(sig): + if f.signature(False) == sig: return W_CPPOverload(self.space, self, [f]) raise oefmt(self.space.w_LookupError, "no overload matches signature") @@ -769,6 +801,9 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name'] + def _make_cppfunction(self, pyname, index): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) @@ -779,9 +814,6 @@ arg_defs.append((arg_type, arg_dflt)) return CPPFunction(self.space, self, index, arg_defs, args_required) - def _build_methods(self): - pass # force lazy lookups in namespaces - def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) offset = capi.c_datamember_offset(self.space, self, dm_idx) @@ -791,9 +823,6 @@ self.datamembers[dm_name] = datamember return datamember - def _find_datamembers(self): - pass # force lazy lookups in namespaces - def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) if not indices: @@ -855,18 +884,21 @@ class W_CPPClassDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] - _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]'] def _build_methods(self): assert len(self.methods) == 0 methods_temp = {} for i in range(capi.c_num_methods(self.space, self)): idx = capi.c_method_index_at(self.space, self, i) - pyname = helper.map_operator_name(self.space, - capi.c_method_name(self.space, self, idx), - capi.c_method_num_args(self.space, self, idx), - capi.c_method_result_type(self.space, self, idx)) + if capi.c_is_constructor(self.space, self, idx): + pyname = '__init__' + else: + pyname = helper.map_operator_name(self.space, + capi.c_method_name(self.space, self, idx), + capi.c_method_num_args(self.space, self, idx), + capi.c_method_result_type(self.space, self, idx)) cppmethod = self._make_cppfunction(pyname, idx) methods_temp.setdefault(pyname, []).append(cppmethod) # the following covers the case where the only kind of operator[](idx) @@ -883,7 +915,7 @@ # create the overload methods from the method sets for pyname, methods in methods_temp.iteritems(): CPPMethodSort(methods).sort() - if pyname == self.name: + if pyname == '__init__': overload = W_CPPConstructorOverload(self.space, self, methods[:]) else: overload = W_CPPOverload(self.space, self, methods[:]) @@ -934,11 +966,11 @@ raise self.missing_attribute_error(name) def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return 0 def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return cppinstance.get_rawobject() def is_namespace(self): @@ -973,13 +1005,13 @@ class W_CPPComplexClassDecl(W_CPPClassDecl): def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = capi.c_base_offset(self.space, self, calling_scope, cppinstance.get_rawobject(), 1) return offset def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = self.get_base_offset(cppinstance, calling_scope) return capi.direct_ptradd(cppinstance.get_rawobject(), offset) @@ -997,70 +1029,56 @@ W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False -class W_CPPTemplateType(W_Root): - _attrs_ = ['space', 'name'] - _immutable_fields = ['name'] - - def __init__(self, space, name): - self.space = space - self.name = name - - @unwrap_spec(args_w='args_w') - def __call__(self, args_w): - # TODO: this is broken but unused (see pythonify.py) - fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>']) - return scope_byname(self.space, fullname) - -W_CPPTemplateType.typedef = TypeDef( - 'CPPTemplateType', - __call__ = interp2app(W_CPPTemplateType.__call__), -) -W_CPPTemplateType.typedef.acceptable_as_base_class = False - - class W_CPPClass(W_Root): - _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns', + _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags', 'finalizer_registered'] - _immutable_fields_ = ["cppclass", "isref"] + _immutable_fields_ = ['clsdecl'] finalizer_registered = False - def __init__(self, space, cppclass, rawobject, isref, python_owns): + def __init__(self, space, decl, rawobject, isref, python_owns): self.space = space - self.cppclass = cppclass + self.clsdecl = decl assert lltype.typeOf(rawobject) == capi.C_OBJECT assert not isref or rawobject self._rawobject = rawobject assert not isref or not python_owns - self.isref = isref - self.python_owns = python_owns - self._opt_register_finalizer() + self.flags = 0 + if isref: + self.flags |= INSTANCE_FLAGS_IS_REF + if python_owns: + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() def _opt_register_finalizer(self): - if self.python_owns and not self.finalizer_registered \ - and not hasattr(self.space, "fake"): + if not self.finalizer_registered and not hasattr(self.space, "fake"): + assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS self.register_finalizer(self.space) self.finalizer_registered = True def _nullcheck(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): raise oefmt(self.space.w_ReferenceError, "trying to access a NULL pointer") # allow user to determine ownership rules on a per object level def fget_python_owns(self, space): - return space.newbool(self.python_owns) + return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS)) @unwrap_spec(value=bool) def fset_python_owns(self, space, value): - self.python_owns = space.is_true(value) - self._opt_register_finalizer() + if space.is_true(value): + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() + else: + self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS def get_cppthis(self, calling_scope): - return self.cppclass.get_cppthis(self, calling_scope) + return self.clsdecl.get_cppthis(self, calling_scope) def get_rawobject(self): - if not self.isref: + if not (self.flags & INSTANCE_FLAGS_IS_REF): return self._rawobject else: ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject) @@ -1078,12 +1096,9 @@ return None def instance__init__(self, args_w): - if capi.c_is_abstract(self.space, self.cppclass.handle): - raise oefmt(self.space.w_TypeError, - "cannot instantiate abstract class '%s'", - self.cppclass.name) - constructor_overload = self.cppclass.get_overload(self.cppclass.name) - constructor_overload.call(self, args_w) + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.clsdecl.name) def instance__eq__(self, w_other): # special case: if other is None, compare pointer-style @@ -1099,7 +1114,7 @@ for name in ["", "__gnu_cxx", "__1"]: nss = scope_byname(self.space, name) meth_idx = capi.c_get_global_operator( - self.space, nss, self.cppclass, other.cppclass, "operator==") + self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: f = nss._make_cppfunction("operator==", meth_idx) ol = W_CPPOverload(self.space, nss, [f]) @@ -1118,14 +1133,15 @@ # fallback 2: direct pointer comparison (the class comparison is needed since # the first data member in a struct and the struct have the same address) other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False) # TODO: factor out - iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass) + iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl) return self.space.newbool(iseq) def instance__ne__(self, w_other): return self.space.not_(self.instance__eq__(w_other)) def instance__nonzero__(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): return self.space.w_False return self.space.w_True @@ -1134,36 +1150,35 @@ if w_as_builtin is not None: return self.space.len(w_as_builtin) raise oefmt(self.space.w_TypeError, - "'%s' has no length", self.cppclass.name) + "'%s' has no length", self.clsdecl.name) def instance__cmp__(self, w_other): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.cmp(w_as_builtin, w_other) raise oefmt(self.space.w_AttributeError, - "'%s' has no attribute __cmp__", self.cppclass.name) + "'%s' has no attribute __cmp__", self.clsdecl.name) def instance__repr__(self): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.repr(w_as_builtin) return self.space.newtext("<%s object at 0x%x>" % - (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) + (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) def destruct(self): - if self._rawobject and not self.isref: + if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): memory_regulator.unregister(self) - capi.c_destruct(self.space, self.cppclass, self._rawobject) + capi.c_destruct(self.space, self.clsdecl, self._rawobject) self._rawobject = capi.C_NULL_OBJECT def _finalize_(self): - if self.python_owns: + if self.flags & INSTANCE_FLAGS_PYTHON_OWNS: self.destruct() W_CPPClass.typedef = TypeDef( 'CPPClass', - cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass), - _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), + __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), __init__ = interp2app(W_CPPClass.instance__init__), __eq__ = interp2app(W_CPPClass.instance__eq__), __ne__ = interp2app(W_CPPClass.instance__ne__), @@ -1220,21 +1235,21 @@ state = space.fromcache(State) return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar)) -def wrap_cppobject(space, rawobject, cppclass, - do_cast=True, python_owns=False, is_ref=False, fresh=False): +def wrap_cppinstance(space, rawobject, clsdecl, + do_cast=True, python_owns=False, is_ref=False, fresh=False): rawobject = rffi.cast(capi.C_OBJECT, rawobject) # cast to actual if requested and possible w_pycppclass = None if do_cast and rawobject: - actual = capi.c_actual_class(space, cppclass, rawobject) - if actual != cppclass.handle: + actual = capi.c_actual_class(space, clsdecl, rawobject) + if actual != clsdecl.handle: try: w_pycppclass = get_pythonized_cppclass(space, actual) - offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1) + offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1) rawobject = capi.direct_ptradd(rawobject, offset) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) + w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False) except Exception: # failed to locate/build the derived class, so stick to the base (note # that only get_pythonized_cppclass is expected to raise, so none of @@ -1242,18 +1257,18 @@ pass if w_pycppclass is None: - w_pycppclass = get_pythonized_cppclass(space, cppclass.handle) + w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle) # try to recycle existing object if this one is not newly created if not fresh and rawobject: obj = memory_regulator.retrieve(rawobject) - if obj is not None and obj.cppclass is cppclass: + if obj is not None and obj.clsdecl is clsdecl: return obj # fresh creation w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass) cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False) - cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns) + cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns) memory_regulator.register(cppinstance) return w_cppinstance @@ -1264,7 +1279,7 @@ except TypeError: pass # attempt to get address of C++ instance - return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj)) + return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False)) @unwrap_spec(w_obj=W_Root) def addressof(space, w_obj): @@ -1273,19 +1288,30 @@ return space.newlong(address) @unwrap_spec(owns=bool, cast=bool) -def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): - """Takes an address and a bound C++ class proxy, returns a bound instance.""" +def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False): try: # attempt address from array or C++ instance rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj)) except Exception: # accept integer value as address rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj)) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - if not w_cppclass: - w_cppclass = scope_byname(space, space.text_w(w_pycppclass)) - if not w_cppclass: + decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False) + return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast) + + at unwrap_spec(owns=bool, cast=bool) +def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): + """Takes an address and a bound C++ class proxy, returns a bound instance.""" + w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + if not w_clsdecl: + w_clsdecl = scope_byname(space, space.text_w(w_pycppclass)) + if not w_clsdecl: raise oefmt(space.w_TypeError, "no such class: %s", space.text_w(w_pycppclass)) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) - return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns) + return _bind_object(space, w_obj, w_clsdecl, owns, cast) + +def move(space, w_obj): + """Casts the given instance into an C++-style rvalue.""" + obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + if obj: + obj.flags |= INSTANCE_FLAGS_IS_R_VALUE + return w_obj diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -10,7 +10,7 @@ class CPPMetaScope(type): def __getattr__(self, name): try: - return get_pycppitem(self, name) # will cache on self + return get_scoped_pycppitem(self, name) # will cache on self except Exception as e: raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) @@ -36,11 +36,14 @@ self._scope = scope def _arg_to_str(self, arg): - if arg == str: - import _cppyy - arg = _cppyy._std_string_name() - elif type(arg) != str: - arg = arg.__name__ + try: + arg = arg.__cppname__ + except AttributeError: + if arg == str: + import _cppyy + arg = _cppyy._std_string_name() + elif type(arg) != str: + arg = arg.__name__ return arg def __call__(self, *args): @@ -58,8 +61,36 @@ return self.__call__(*args) -def clgen_callback(name): - return get_pycppclass(name) +def scope_splitter(name): + is_open_template, scope = 0, "" + for c in name: + if c == ':' and not is_open_template: + if scope: + yield scope + scope = "" + continue + elif c == '<': + is_open_template += 1 + elif c == '>': + is_open_template -= 1 + scope += c + yield scope + +def get_pycppitem(final_scoped_name): + # walk scopes recursively down from global namespace ("::") to get the + # actual (i.e. not typedef'ed) class, triggering all necessary creation + scope = gbl + for name in scope_splitter(final_scoped_name): + scope = getattr(scope, name) + return scope +get_pycppclass = get_pycppitem # currently no distinction, but might + # in future for performance + + +# callbacks (originating from interp_cppyy.py) to allow interp-level to +# initiate creation of app-level classes and function +def clgen_callback(final_scoped_name): + return get_pycppclass(final_scoped_name) def fngen_callback(func, npar): # todo, some kind of arg transform spec if npar == 0: @@ -75,20 +106,19 @@ return wrapper +# construction of namespaces and classes, and their helpers +def make_module_name(scope): + if scope: + return scope.__module__ + '.' + scope.__name__ + return 'cppyy' + def make_static_function(func_name, cppol): def function(*args): return cppol.call(None, *args) function.__name__ = func_name - function.__doc__ = cppol.signature() + function.__doc__ = cppol.prototype() return staticmethod(function) -def make_method(meth_name, cppol): - def method(self, *args): - return cppol.call(self, *args) - method.__name__ = meth_name - method.__doc__ = cppol.signature() - return method - def make_cppnamespace(scope, name, decl): # build up a representation of a C++ namespace (namespaces are classes) @@ -98,20 +128,19 @@ ns_meta = type(name+'_meta', (CPPMetaNamespace,), {}) # create the python-side C++ namespace representation, cache in scope if given - d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ } + d = {"__cppdecl__" : decl, + "__module__" : make_module_name(scope), + "__cppname__" : decl.__cppname__ } pyns = ns_meta(name, (CPPNamespace,), d) if scope: setattr(scope, name, pyns) # install as modules to allow importing from (note naming: cppyy) - modname = 'cppyy.gbl' - if scope: - modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.') - sys.modules[modname] = pyns + sys.modules[make_module_name(pyns)] = pyns return pyns def _drop_cycles(bases): - # TODO: figure this out, as it seems to be a PyPy bug?! + # TODO: figure out why this is necessary? for b1 in bases: for b2 in bases: if not (b1 is b2) and issubclass(b2, b1): @@ -119,27 +148,37 @@ break return tuple(bases) -def make_new(class_name): + +def make_new(decl): def __new__(cls, *args): # create a place-holder only as there may be a derived class defined + # TODO: get rid of the import and add user-land bind_object that uses + # _bind_object (see interp_cppyy.py) import _cppyy - instance = _cppyy.bind_object(0, class_name, True) + instance = _cppyy._bind_object(0, decl, True) if not instance.__class__ is cls: instance.__class__ = cls # happens for derived class return instance return __new__ -def make_cppclass(scope, class_name, final_class_name, decl): +def make_method(meth_name, cppol): + def method(self, *args): + return cppol.call(self, *args) + method.__name__ = meth_name + method.__doc__ = cppol.prototype() + return method + +def make_cppclass(scope, cl_name, decl): # get a list of base classes for class creation bases = [get_pycppclass(base) for base in decl.get_base_names()] if not bases: bases = [CPPClass,] else: - # it's technically possible that the required class now has been built - # if one of the base classes uses it in e.g. a function interface + # it's possible that the required class now has been built if one of + # the base classes uses it in e.g. a function interface try: - return scope.__dict__[final_class_name] + return scope.__dict__[cl_name] except KeyError: pass @@ -147,39 +186,41 @@ d_meta = {} # prepare dictionary for python-side C++ class representation - def dispatch(self, name, signature): - cppol = decl.dispatch(name, signature) - return types.MethodType(make_method(name, cppol), self, type(self)) + def dispatch(self, m_name, signature): + cppol = decl.__dispatch__(m_name, signature) + return types.MethodType(make_method(m_name, cppol), self, type(self)) d_class = {"__cppdecl__" : decl, + "__new__" : make_new(decl), + "__module__" : make_module_name(scope), "__cppname__" : decl.__cppname__, - "__new__" : make_new(class_name), + "__dispatch__" : dispatch, } # insert (static) methods into the class dictionary - for name in decl.get_method_names(): - cppol = decl.get_overload(name) + for m_name in decl.get_method_names(): + cppol = decl.get_overload(m_name) if cppol.is_static(): - d_class[name] = make_static_function(name, cppol) + d_class[m_name] = make_static_function(m_name, cppol) else: - d_class[name] = make_method(name, cppol) + d_class[m_name] = make_method(m_name, cppol) # add all data members to the dictionary of the class to be created, and # static ones also to the metaclass (needed for property setters) - for name in decl.get_datamember_names(): - cppdm = decl.get_datamember(name) - d_class[name] = cppdm + for d_name in decl.get_datamember_names(): + cppdm = decl.get_datamember(d_name) + d_class[d_name] = cppdm if cppdm.is_static(): - d_meta[name] = cppdm + d_meta[d_name] = cppdm # create a metaclass to allow properties (for static data write access) metabases = [type(base) for base in bases] - metacpp = type(CPPMetaScope)(class_name+'_meta', _drop_cycles(metabases), d_meta) + metacpp = type(CPPMetaScope)(cl_name+'_meta', _drop_cycles(metabases), d_meta) # create the python-side C++ class - pycls = metacpp(class_name, _drop_cycles(bases), d_class) + pycls = metacpp(cl_name, _drop_cycles(bases), d_class) # store the class on its outer scope - setattr(scope, final_class_name, pycls) + setattr(scope, cl_name, pycls) # the call to register will add back-end specific pythonizations and thus # needs to run first, so that the generic pythonizations can use them @@ -192,32 +233,32 @@ return CPPTemplate(template_name, scope) -def get_pycppitem(scope, name): +def get_scoped_pycppitem(scope, name): import _cppyy - # resolve typedefs/aliases - full_name = (scope == gbl) and name or (scope.__name__+'::'+name) - true_name = _cppyy._resolve_name(full_name) - if true_name != full_name: - return get_pycppclass(true_name) + # resolve typedefs/aliases: these may cross namespaces, in which case + # the lookup must trigger the creation of all necessary scopes + scoped_name = (scope == gbl) and name or (scope.__cppname__+'::'+name) + final_scoped_name = _cppyy._resolve_name(scoped_name) + if final_scoped_name != scoped_name: + pycppitem = get_pycppitem(final_scoped_name) + # also store on the requested scope (effectively a typedef or pointer copy) + setattr(scope, name, pycppitem) + return pycppitem pycppitem = None - # classes - cppitem = _cppyy._scope_byname(true_name) + # scopes (classes and namespaces) + cppitem = _cppyy._scope_byname(final_scoped_name) if cppitem: - name = true_name - if scope != gbl: - name = true_name[len(scope.__cppname__)+2:] if cppitem.is_namespace(): pycppitem = make_cppnamespace(scope, name, cppitem) - setattr(scope, name, pycppitem) else: - pycppitem = make_cppclass(scope, name, true_name, cppitem) + pycppitem = make_cppclass(scope, name, cppitem) # templates if not cppitem: - cppitem = _cppyy._template_byname(true_name) + cppitem = _cppyy._is_template(final_scoped_name) if cppitem: pycppitem = make_cpptemplatetype(scope, name) setattr(scope, name, pycppitem) @@ -249,29 +290,6 @@ raise AttributeError("'%s' has no attribute '%s'" % (str(scope), name)) -def scope_splitter(name): - is_open_template, scope = 0, "" - for c in name: - if c == ':' and not is_open_template: - if scope: - yield scope - scope = "" - continue - elif c == '<': - is_open_template += 1 - elif c == '>': - is_open_template -= 1 - scope += c - yield scope - -def get_pycppclass(name): - # break up the name, to walk the scopes and get the class recursively - scope = gbl - for part in scope_splitter(name): - scope = getattr(scope, part) - return scope - - # pythonization by decoration (move to their own file?) def python_style_getitem(self, idx): # python-style indexing: check for size and allow indexing from the back @@ -346,8 +364,8 @@ # also the fallback on the indexed __getitem__, but that is slower) if not 'vector' in pyclass.__name__[:11] and \ ('begin' in pyclass.__dict__ and 'end' in pyclass.__dict__): - if _cppyy._scope_byname(pyclass.__name__+'::iterator') or \ - _cppyy._scope_byname(pyclass.__name__+'::const_iterator'): + if _cppyy._scope_byname(pyclass.__cppname__+'::iterator') or \ + _cppyy._scope_byname(pyclass.__cppname__+'::const_iterator'): def __iter__(self): i = self.begin() while i != self.end(): @@ -416,17 +434,21 @@ # pre-create std to allow direct importing gbl.std = make_cppnamespace(gbl, 'std', _cppyy._scope_byname('std')) + # add move cast + gbl.std.move = _cppyy.move + # install a type for enums to refer to # TODO: this is correct for C++98, not for C++11 and in general there will # be the same issue for all typedef'd builtin types setattr(gbl, 'internal_enum_type_t', int) - # install nullptr as a unique reference - setattr(gbl, 'nullptr', _cppyy._get_nullptr()) - # install for user access _cppyy.gbl = gbl + # install nullptr as a unique reference + _cppyy.nullptr = _cppyy._get_nullptr() + + # user-defined pythonizations interface _pythonizations = {} def add_pythonization(class_name, callback): diff --git a/pypy/module/_cppyy/src/dummy_backend.cxx b/pypy/module/_cppyy/src/dummy_backend.cxx --- a/pypy/module/_cppyy/src/dummy_backend.cxx +++ b/pypy/module/_cppyy/src/dummy_backend.cxx @@ -955,7 +955,13 @@ return cppstring_to_cstring(""); } -char* cppyy_method_signature(cppyy_scope_t /* handle */, cppyy_index_t /* method_index */) { +char* cppyy_method_signature( + cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) { + return cppstring_to_cstring(""); +} + +char* cppyy_method_prototype( + cppyy_scope_t /* handle */, cppyy_index_t /* method_index */, int /* show_formalargs */) { return cppstring_to_cstring(""); } diff --git a/pypy/module/_cppyy/test/Makefile b/pypy/module/_cppyy/test/Makefile --- a/pypy/module/_cppyy/test/Makefile +++ b/pypy/module/_cppyy/test/Makefile @@ -1,12 +1,14 @@ dicts = advancedcppDict.so \ advancedcpp2Dict.so \ + cpp11featuresDict.so \ crossingDict.so \ datatypesDict.so \ example01Dict.so \ fragileDict.so \ operatorsDict.so \ overloadsDict.so \ - stltypesDict.so + stltypesDict.so \ + templatesDict.so all : $(dicts) diff --git a/pypy/module/_cppyy/test/advancedcpp.cxx b/pypy/module/_cppyy/test/advancedcpp.cxx --- a/pypy/module/_cppyy/test/advancedcpp.cxx +++ b/pypy/module/_cppyy/test/advancedcpp.cxx @@ -106,17 +106,6 @@ } -// more template testing -long my_templated_method_class::get_size() { return -1; } - -long my_templated_method_class::get_char_size() { return (long)sizeof(char); } -long my_templated_method_class::get_int_size() { return (long)sizeof(int); } -long my_templated_method_class::get_long_size() { return (long)sizeof(long); } -long my_templated_method_class::get_float_size() { return (long)sizeof(float); } -long my_templated_method_class::get_double_size() { return (long)sizeof(double); } -long my_templated_method_class::get_self_size() { return (long)sizeof(my_templated_method_class); } - - // overload order testing int overload_one_way::gime() const { return 1; } std::string overload_one_way::gime() { return "aap"; } diff --git a/pypy/module/_cppyy/test/advancedcpp.h b/pypy/module/_cppyy/test/advancedcpp.h --- a/pypy/module/_cppyy/test/advancedcpp.h +++ b/pypy/module/_cppyy/test/advancedcpp.h @@ -246,8 +246,6 @@ int m_i; }; -template class std::vector; - //=========================================================================== class some_convertible { // for math conversions testing @@ -275,6 +273,7 @@ extern double my_global_double; // a couple of globals for access testing extern double my_global_array[500]; extern double* my_global_ptr; +static const char my_global_string[] = "aap " " noot " " mies"; //=========================================================================== class some_class_with_data { // for life-line and identity testing @@ -387,37 +386,6 @@ template char my_templated_function(char); template double my_templated_function(double); -class my_templated_method_class { -public: - long get_size(); // to get around bug in genreflex - template long get_size(); - - long get_char_size(); - long get_int_size(); - long get_long_size(); - long get_float_size(); - long get_double_size(); - - long get_self_size(); - -private: - double m_data[3]; -}; - -template -inline long my_templated_method_class::get_size() { - return sizeof(B); -} - -template long my_templated_method_class::get_size(); -template long my_templated_method_class::get_size(); -template long my_templated_method_class::get_size(); -template long my_templated_method_class::get_size(); -template long my_templated_method_class::get_size(); - -typedef my_templated_method_class my_typedef_t; -template long my_templated_method_class::get_size(); - //=========================================================================== class overload_one_way { // overload order testing diff --git a/pypy/module/_cppyy/test/advancedcpp.xml b/pypy/module/_cppyy/test/advancedcpp.xml --- a/pypy/module/_cppyy/test/advancedcpp.xml +++ b/pypy/module/_cppyy/test/advancedcpp.xml @@ -53,8 +53,6 @@ - - diff --git a/pypy/module/_cppyy/test/cpp11features.cxx b/pypy/module/_cppyy/test/cpp11features.cxx new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/cpp11features.cxx @@ -0,0 +1,18 @@ +#if __cplusplus >= 201103L + +#include "cpp11features.h" + + +// for std::shared_ptr<> testing +int TestSharedPtr::s_counter = 0; + +std::shared_ptr create_shared_ptr_instance() { + return std::shared_ptr(new TestSharedPtr); +} + + +// for move ctors etc. +int TestMoving1::s_move_counter = 0; +int TestMoving2::s_move_counter = 0; + +#endif // c++11 and later diff --git a/pypy/module/_cppyy/test/cpp11features.h b/pypy/module/_cppyy/test/cpp11features.h new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/cpp11features.h @@ -0,0 +1,45 @@ +#if __cplusplus >= 201103L + +#include + + +//=========================================================================== +class TestSharedPtr { // for std::shared_ptr<> testing +public: + static int s_counter; + +public: + TestSharedPtr() { ++s_counter; } + TestSharedPtr(const TestSharedPtr&) { ++s_counter; } + ~TestSharedPtr() { --s_counter; } +}; + +std::shared_ptr create_shared_ptr_instance(); + + +//=========================================================================== +class TestMoving1 { // for move ctors etc. +public: + static int s_move_counter; + +public: + TestMoving1() {} + TestMoving1(TestMoving1&&) { ++s_move_counter; } + TestMoving1(const TestMoving1&) {} + TestMoving1& operator=(TestMoving1&&) { ++s_move_counter; return *this; } + TestMoving1& operator=(TestMoving1&) { return *this; } +}; + +class TestMoving2 { // note opposite method order from TestMoving1 +public: + static int s_move_counter; + +public: + TestMoving2() {} + TestMoving2(const TestMoving2&) {} + TestMoving2(TestMoving2&& other) { ++s_move_counter; } + TestMoving2& operator=(TestMoving2&) { return *this; } + TestMoving2& operator=(TestMoving2&&) { ++s_move_counter; return *this; } +}; + +#endif // c++11 and later diff --git a/pypy/module/_cppyy/test/cpp11features.xml b/pypy/module/_cppyy/test/cpp11features.xml new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/cpp11features.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/pypy/module/_cppyy/test/fragile.h b/pypy/module/_cppyy/test/fragile.h --- a/pypy/module/_cppyy/test/fragile.h +++ b/pypy/module/_cppyy/test/fragile.h @@ -30,9 +30,11 @@ void overload(int, no_such_class* p = 0) {} }; +static const int dummy_location = 0xdead; + class E { public: - E() : m_pp_no_such(0), m_pp_a(0) {} + E() : m_pp_no_such((no_such_class**)&dummy_location), m_pp_a(0) {} virtual int check() { return (int)'E'; } void overload(no_such_class**) {} diff --git a/pypy/module/_cppyy/test/templates.cxx b/pypy/module/_cppyy/test/templates.cxx new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/templates.cxx @@ -0,0 +1,12 @@ +#include "templates.h" + + +// template methods +long MyTemplatedMethodClass::get_size() { return -1; } + +long MyTemplatedMethodClass::get_char_size() { return (long)sizeof(char); } +long MyTemplatedMethodClass::get_int_size() { return (long)sizeof(int); } +long MyTemplatedMethodClass::get_long_size() { return (long)42; /* "lying" */ } +long MyTemplatedMethodClass::get_float_size() { return (long)sizeof(float); } +long MyTemplatedMethodClass::get_double_size() { return (long)sizeof(double); } +long MyTemplatedMethodClass::get_self_size() { return (long)sizeof(MyTemplatedMethodClass); } diff --git a/pypy/module/_cppyy/test/templates.h b/pypy/module/_cppyy/test/templates.h new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/templates.h @@ -0,0 +1,35 @@ +//=========================================================================== +class MyTemplatedMethodClass { // template methods +public: + long get_size(); // to get around bug in genreflex + template long get_size(); + + long get_char_size(); + long get_int_size(); + long get_long_size(); + long get_float_size(); + long get_double_size(); + + long get_self_size(); + +private: + double m_data[3]; +}; + +template +inline long MyTemplatedMethodClass::get_size() { + return sizeof(B); +} + +// +typedef MyTemplatedMethodClass MyTMCTypedef_t; + +// explicit instantiation +template long MyTemplatedMethodClass::get_size(); +template long MyTemplatedMethodClass::get_size(); + +// "lying" specialization +template<> +inline long MyTemplatedMethodClass::get_size() { + return 42; +} diff --git a/pypy/module/_cppyy/test/templates.xml b/pypy/module/_cppyy/test/templates.xml new file mode 100644 --- /dev/null +++ b/pypy/module/_cppyy/test/templates.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/pypy/module/_cppyy/test/test_advancedcpp.py b/pypy/module/_cppyy/test/test_advancedcpp.py --- a/pypy/module/_cppyy/test/test_advancedcpp.py +++ b/pypy/module/_cppyy/test/test_advancedcpp.py @@ -28,9 +28,9 @@ def test01_default_arguments(self): """Test usage of default arguments""" - import _cppyy + import _cppyy as cppyy def test_defaulter(n, t): - defaulter = getattr(_cppyy.gbl, '%s_defaulter' % n) + defaulter = getattr(cppyy.gbl, '%s_defaulter' % n) d = defaulter() assert d.m_a == t(11) @@ -55,23 +55,23 @@ assert d.m_b == t(4) assert d.m_c == t(5) d.__destruct__() - test_defaulter('short', int) + test_defaulter('short', int) test_defaulter('ushort', int) - test_defaulter('int', int) - test_defaulter('uint', int) - test_defaulter('long', long) - test_defaulter('ulong', long) - test_defaulter('llong', long) + test_defaulter('int', int) + test_defaulter('uint', int) + test_defaulter('long', long) + test_defaulter('ulong', long) + test_defaulter('llong', long) test_defaulter('ullong', long) - test_defaulter('float', float) + test_defaulter('float', float) test_defaulter('double', float) def test02_simple_inheritance(self): """Test binding of a basic inheritance structure""" - import _cppyy - base_class = _cppyy.gbl.base_class - derived_class = _cppyy.gbl.derived_class + import _cppyy as cppyy + base_class = cppyy.gbl.base_class + derived_class = cppyy.gbl.derived_class assert issubclass(derived_class, base_class) assert not issubclass(base_class, derived_class) @@ -123,8 +123,8 @@ def test03_namespaces(self): """Test access to namespaces and inner classes""" - import _cppyy - gbl = _cppyy.gbl + import _cppyy as cppyy + gbl = cppyy.gbl assert gbl.a_ns is gbl.a_ns assert gbl.a_ns.d_ns is gbl.a_ns.d_ns @@ -150,8 +150,8 @@ def test03a_namespace_lookup_on_update(self): """Test whether namespaces can be shared across dictionaries.""" - import _cppyy, ctypes - gbl = _cppyy.gbl + import _cppyy as cppyy, ctypes + gbl = cppyy.gbl lib2 = ctypes.CDLL("./advancedcpp2Dict.so", ctypes.RTLD_GLOBAL) @@ -179,8 +179,8 @@ def test04_template_types(self): """Test bindings of templated types""" - import _cppyy - gbl = _cppyy.gbl + import _cppyy as cppyy + gbl = cppyy.gbl assert gbl.T1 is gbl.T1 assert gbl.T2 is gbl.T2 @@ -245,8 +245,8 @@ def test05_abstract_classes(self): """Test non-instatiatability of abstract classes""" - import _cppyy - gbl = _cppyy.gbl + import _cppyy as cppyy + gbl = cppyy.gbl raises(TypeError, gbl.a_class) raises(TypeError, gbl.some_abstract_class) @@ -260,12 +260,12 @@ def test06_datamembers(self): """Test data member access when using virtual inheritence""" - import _cppyy - a_class = _cppyy.gbl.a_class - b_class = _cppyy.gbl.b_class - c_class_1 = _cppyy.gbl.c_class_1 - c_class_2 = _cppyy.gbl.c_class_2 - d_class = _cppyy.gbl.d_class + import _cppyy as cppyy + a_class = cppyy.gbl.a_class + b_class = cppyy.gbl.b_class + c_class_1 = cppyy.gbl.c_class_1 + c_class_2 = cppyy.gbl.c_class_2 + d_class = cppyy.gbl.d_class assert issubclass(b_class, a_class) assert issubclass(c_class_1, a_class) @@ -354,8 +354,8 @@ def test07_pass_by_reference(self): """Test reference passing when using virtual inheritance""" - import _cppyy - gbl = _cppyy.gbl + import _cppyy as cppyy + gbl = cppyy.gbl b_class = gbl.b_class c_class = gbl.c_class_2 d_class = gbl.d_class @@ -387,71 +387,75 @@ def test08_void_pointer_passing(self): """Test passing of variants of void pointer arguments""" - import _cppyy - pointer_pass = _cppyy.gbl.pointer_pass - some_concrete_class = _cppyy.gbl.some_concrete_class + import _cppyy as cppyy + pointer_pass = cppyy.gbl.pointer_pass + some_concrete_class = cppyy.gbl.some_concrete_class pp = pointer_pass() o = some_concrete_class() - assert _cppyy.addressof(o) == pp.gime_address_ptr(o) - assert _cppyy.addressof(o) == pp.gime_address_ptr_ptr(o) - assert _cppyy.addressof(o) == pp.gime_address_ptr_ref(o) + assert cppyy.addressof(o) == pp.gime_address_ptr(o) + assert cppyy.addressof(o) == pp.gime_address_ptr_ptr(o) + assert cppyy.addressof(o) == pp.gime_address_ptr_ref(o) import array - addressofo = array.array('l', [_cppyy.addressof(o)]) - assert addressofo.buffer_info()[0] == pp.gime_address_ptr_ptr(addressofo) + addressofo = array.array('l', [cppyy.addressof(o)]) + assert addressofo[0] == pp.gime_address_ptr_ptr(addressofo) assert 0 == pp.gime_address_ptr(0) - assert 0 == pp.gime_address_ptr(None) + raises(TypeError, pp.gime_address_ptr, None) - ptr = _cppyy.bind_object(0, some_concrete_class) - assert _cppyy.addressof(ptr) == 0 + ptr = cppyy.bind_object(0, some_concrete_class) + assert cppyy.addressof(ptr) == 0 pp.set_address_ptr_ref(ptr) - assert _cppyy.addressof(ptr) == 0x1234 + assert cppyy.addressof(ptr) == 0x1234 pp.set_address_ptr_ptr(ptr) - assert _cppyy.addressof(ptr) == 0x4321 + assert cppyy.addressof(ptr) == 0x4321 + + assert cppyy.addressof(cppyy.nullptr) == 0 + raises(TypeError, cppyy.addressof, None) + assert cppyy.addressof(0) == 0 def test09_opaque_pointer_passing(self): """Test passing around of opaque pointers""" - import _cppyy - some_concrete_class = _cppyy.gbl.some_concrete_class + import _cppyy as cppyy + some_concrete_class = cppyy.gbl.some_concrete_class o = some_concrete_class() # TODO: figure out the PyPy equivalent of CObject (may have to do this # through the C-API from C++) - #cobj = _cppyy.as_cobject(o) - addr = _cppyy.addressof(o) + #cobj = cppyy.as_cobject(o) + addr = cppyy.addressof(o) - #assert o == _cppyy.bind_object(cobj, some_concrete_class) - #assert o == _cppyy.bind_object(cobj, type(o)) - #assert o == _cppyy.bind_object(cobj, o.__class__) - #assert o == _cppyy.bind_object(cobj, "some_concrete_class") - assert _cppyy.addressof(o) == _cppyy.addressof(_cppyy.bind_object(addr, some_concrete_class)) - assert o == _cppyy.bind_object(addr, some_concrete_class) - assert o == _cppyy.bind_object(addr, type(o)) - assert o == _cppyy.bind_object(addr, o.__class__) - assert o == _cppyy.bind_object(addr, "some_concrete_class") - raises(TypeError, _cppyy.bind_object, addr, "does_not_exist") - raises(TypeError, _cppyy.bind_object, addr, 1) + #assert o == cppyy.bind_object(cobj, some_concrete_class) + #assert o == cppyy.bind_object(cobj, type(o)) + #assert o == cppyy.bind_object(cobj, o.__class__) + #assert o == cppyy.bind_object(cobj, "some_concrete_class") + assert cppyy.addressof(o) == cppyy.addressof(cppyy.bind_object(addr, some_concrete_class)) + assert o == cppyy.bind_object(addr, some_concrete_class) + assert o == cppyy.bind_object(addr, type(o)) + assert o == cppyy.bind_object(addr, o.__class__) + assert o == cppyy.bind_object(addr, "some_concrete_class") + raises(TypeError, cppyy.bind_object, addr, "does_not_exist") + raises(TypeError, cppyy.bind_object, addr, 1) def test10_object_identity(self): """Test object identity""" - import _cppyy - some_concrete_class = _cppyy.gbl.some_concrete_class - some_class_with_data = _cppyy.gbl.some_class_with_data + import _cppyy as cppyy + some_concrete_class = cppyy.gbl.some_concrete_class + some_class_with_data = cppyy.gbl.some_class_with_data o = some_concrete_class() - addr = _cppyy.addressof(o) + addr = cppyy.addressof(o) - o2 = _cppyy.bind_object(addr, some_concrete_class) + o2 = cppyy.bind_object(addr, some_concrete_class) assert o is o2 - o3 = _cppyy.bind_object(addr, some_class_with_data) + o3 = cppyy.bind_object(addr, some_class_with_data) assert not o is o3 d1 = some_class_with_data() @@ -472,13 +476,13 @@ def test11_multi_methods(self): """Test calling of methods from multiple inheritance""" - import _cppyy - multi = _cppyy.gbl.multi + import _cppyy as cppyy + multi = cppyy.gbl.multi - assert _cppyy.gbl.multi1 is multi.__bases__[0] - assert _cppyy.gbl.multi2 is multi.__bases__[1] From pypy.commits at gmail.com Thu Nov 2 04:07:39 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 01:07:39 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: merge default into py3.5 Message-ID: <59fad24b.95091c0a.13a9.6c3a@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r92902:6311c6a24eba Date: 2017-11-02 10:10 +0200 http://bitbucket.org/pypy/pypy/changeset/6311c6a24eba/ Log: merge default into py3.5 diff --git a/pypy/tool/build_cffi_imports.py b/pypy/tool/build_cffi_imports.py --- a/pypy/tool/build_cffi_imports.py +++ b/pypy/tool/build_cffi_imports.py @@ -145,6 +145,12 @@ shutil.rmtree(str(join(basedir,'lib_pypy','__pycache__')), ignore_errors=True) + # be sure pip, setuptools are installed in a fresh pypy + # allows proper functioning of cffi on win32 with newer vc compilers + # XXX move this to a build slave step? + status, stdout, stderr = run_subprocess(str(pypy_c), ['-c', 'import setuptools']) + if status != 0: + status, stdout, stderr = run_subprocess(str(pypy_c), ['-m', 'ensurepip']) failures = [] for key, module in sorted(cffi_build_scripts.items()): diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -20,6 +20,7 @@ import py import fnmatch import subprocess +from pypy.tool.release.smartstrip import smartstrip USE_ZIPFILE_MODULE = sys.platform == 'win32' @@ -223,15 +224,8 @@ old_dir = os.getcwd() try: os.chdir(str(builddir)) - if not options.nostrip: - for source, target in binaries: - if sys.platform == 'win32': - pass - elif sys.platform == 'darwin': - # 'strip' fun: see issue #587 for why -x - os.system("strip -x " + str(bindir.join(target))) # ignore errors - else: - os.system("strip " + str(bindir.join(target))) # ignore errors + for source, target in binaries: + smartstrip(bindir.join(target), keep_debug=options.keep_debug) # if USE_ZIPFILE_MODULE: import zipfile @@ -297,8 +291,8 @@ help='do not build and package the %r cffi module' % (key,)) parser.add_argument('--without-cffi', dest='no_cffi', action='store_true', help='skip building *all* the cffi modules listed above') - parser.add_argument('--nostrip', dest='nostrip', action='store_true', - help='do not strip the exe, making it ~10MB larger') + parser.add_argument('--no-keep-debug', dest='keep_debug', + action='store_false', help='do not keep debug symbols') parser.add_argument('--rename_pypy_c', dest='pypy_c', type=str, default=pypy_exe, help='target executable name, defaults to "%s"' % pypy_exe) parser.add_argument('--archive-name', dest='name', type=str, default='', @@ -317,8 +311,8 @@ '(default on OS X)') options = parser.parse_args(args) - if os.environ.has_key("PYPY_PACKAGE_NOSTRIP"): - options.nostrip = True + if os.environ.has_key("PYPY_PACKAGE_NOKEEPDEBUG"): + options.keep_debug = False if os.environ.has_key("PYPY_PACKAGE_WITHOUTTK"): options.no_tk = True if os.environ.has_key("PYPY_EMBED_DEPENDENCIES"): diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py new file mode 100644 --- /dev/null +++ b/pypy/tool/release/smartstrip.py @@ -0,0 +1,32 @@ +""" +Strip symbols from an executable, but keep them in a .debug file +""" + +import sys +import os +import py + +def _strip(exe): + if sys.platform == 'win32': + pass + elif sys.platform == 'darwin': + # 'strip' fun: see issue #587 for why -x + os.system("strip -x " + str(exe)) # ignore errors + else: + os.system("strip " + str(exe)) # ignore errors + +def _extract_debug_symbols(exe, debug): + if sys.platform == 'linux2': + os.system("objcopy --only-keep-debug %s %s" % (exe, debug)) + os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe)) + +def smartstrip(exe, keep_debug=True): + exe = py.path.local(exe) + debug = py.path.local(str(exe) + '.debug') + if keep_debug: + _extract_debug_symbols(exe, debug) + _strip(exe) + + +if __name__ == '__main__': + smartstrip(sys.argv[1]) diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py new file mode 100644 --- /dev/null +++ b/pypy/tool/release/test/test_smartstrip.py @@ -0,0 +1,50 @@ +import pytest +import sys +import os +from commands import getoutput +from pypy.tool.release.smartstrip import smartstrip + + at pytest.fixture +def exe(tmpdir): + src = tmpdir.join("myprog.c") + src.write(""" + int foo(int a, int b) { + return a+b; + } + int main(void) { } + """) + exe = tmpdir.join("myprog") + ret = os.system("gcc -o %s %s" % (exe, src)) + assert ret == 0 + return exe + +def info_symbol(exe, symbol): + out = getoutput("gdb %s -ex 'info symbol %s' -ex 'quit'" % (exe, symbol)) + lines = out.splitlines() + return lines[-1] + + at pytest.mark.skipif(sys.platform == 'win32', + reason='strip not supported on windows') +class TestSmarStrip(object): + + def test_info_symbol(self, exe): + info = info_symbol(exe, "foo") + assert info == "foo in section .text" + + def test_strip(self, exe): + smartstrip(exe, keep_debug=False) + info = info_symbol(exe, "foo") + assert info.startswith("No symbol table is loaded") + + @pytest.mark.skipif(sys.platform != 'linux2', + reason='keep_debug not supported') + def test_keep_debug(self, exe, tmpdir): + smartstrip(exe, keep_debug=True) + debug = tmpdir.join("myprog.debug") + assert debug.check(file=True) + info = info_symbol(exe, "foo") + assert info == "foo in section .text of %s" % exe + # + debug.remove() + info = info_symbol(exe, "foo") + assert info.startswith("No symbol table is loaded") From pypy.commits at gmail.com Thu Nov 2 06:38:38 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 02 Nov 2017 03:38:38 -0700 (PDT) Subject: [pypy-commit] pypy canraise-assertionerror: branch to experiment with canraise not raising for assertion error Message-ID: <59faf5ae.048e1c0a.46413.a1dc@mx.google.com> Author: fijal Branch: canraise-assertionerror Changeset: r92903:904915e5425e Date: 2017-11-02 11:37 +0100 http://bitbucket.org/pypy/pypy/changeset/904915e5425e/ Log: branch to experiment with canraise not raising for assertion error diff --git a/rpython/translator/backendopt/canraise.py b/rpython/translator/backendopt/canraise.py --- a/rpython/translator/backendopt/canraise.py +++ b/rpython/translator/backendopt/canraise.py @@ -1,6 +1,9 @@ from rpython.rtyper.lltypesystem.lloperation import LL_OPERATIONS +from rpython.rtyper.lltypesystem import lltype +from rpython.rtyper import rclass from rpython.tool.ansi_print import AnsiLogger from rpython.translator.backendopt import graphanalyze +from rpython.flowspace import model as flowmodel log = AnsiLogger("canraise") @@ -8,6 +11,14 @@ class RaiseAnalyzer(graphanalyze.BoolGraphAnalyzer): ignore_exact_class = None + def __init__(self, translator): + graphanalyze.BoolGraphAnalyzer.__init__(self, translator) + ed = translator.rtyper.exceptiondata + self.ll_assert_error = ed.get_standard_ll_exc_instance_by_class( + AssertionError) + self.ll_not_impl_error = ed.get_standard_ll_exc_instance_by_class( + NotImplementedError) + def do_ignore_memory_error(self): self.ignore_exact_class = MemoryError @@ -25,6 +36,12 @@ analyze_exceptblock = None # don't call this def analyze_exceptblock_in_graph(self, graph, block, seen=None): + def producer(block, v): + for op in block.operations: + if op.result is v: + return op + assert False + if self.ignore_exact_class is not None: from rpython.translator.backendopt.ssa import DataFlowFamilyBuilder dff = DataFlowFamilyBuilder(graph) @@ -38,7 +55,34 @@ # it doesn't count. We'll see the place that really # raises the exception in the first place. return False - return True + # find all the blocks leading to the raise block + blocks = [] + for candidate in graph.iterblocks(): + if len(candidate.exits) != 1: + continue + if candidate.exits[0].target is block: + blocks.append(candidate) + ignored = 0 + import pdb + pdb.set_trace() + for preblock in blocks: + exc_val = preblock.exits[0].args[1] + if isinstance(exc_val, flowmodel.Constant): + exc = exc_val.value + else: + # find the producer + op = producer(preblock, exc_val) + if op.opname == 'cast_pointer': + exc_val = op.args[0] + op = producer(preblock, exc_val) + if op.opname != 'same_as': + # something strange, return True + return True + exc = op.args[0].value + p = lltype.cast_pointer(rclass.OBJECTPTR, exc) + if p == self.ll_assert_error or p == self.ll_not_impl_error: + ignored += 1 + return ignored < len(blocks) # backward compatible interface def can_raise(self, op, seen=None): diff --git a/rpython/translator/backendopt/test/test_canraise.py b/rpython/translator/backendopt/test/test_canraise.py --- a/rpython/translator/backendopt/test/test_canraise.py +++ b/rpython/translator/backendopt/test/test_canraise.py @@ -1,7 +1,8 @@ +from rpython.conftest import option +from rpython.rtyper.lltypesystem import rffi from rpython.translator.translator import TranslationContext, graphof from rpython.translator.backendopt.canraise import RaiseAnalyzer from rpython.translator.backendopt.all import backend_optimizations -from rpython.conftest import option class TestCanRaise(object): def translate(self, func, sig): @@ -253,3 +254,48 @@ ra.do_ignore_memory_error() # but it's potentially a KeyError result = ra.analyze_direct_call(graphof(t, h)) assert result + + def test_charp2str(self): + def f(a): + return len(rffi.charp2str(a)) + + t, ra = self.translate(f, [rffi.CCHARP]) + ra.do_ignore_memory_error() + result = ra.analyze_direct_call(graphof(t, f)) + assert not result # ignore AssertionError + + def test_calls_raise_not_impl(self): + def raising(): + raise NotImplementedError + + def not_raising(): + pass + + def f(a): + if a == 15: + raising() + else: + not_raising() + + t, ra = self.translate(f, [int]) + ra.do_ignore_memory_error() + result = ra.analyze_direct_call(graphof(t, f)) + assert not result # ignore AssertionError + + def test_calls_raise_assertion_error(self): + def raising(): + assert False + + def not_raising(): + pass + + def f(a): + if a == 15: + raising() + else: + not_raising() + + t, ra = self.translate(f, [int]) + ra.do_ignore_memory_error() + result = ra.analyze_direct_call(graphof(t, f)) + assert not result # ignore AssertionError From pypy.commits at gmail.com Thu Nov 2 07:03:14 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 02 Nov 2017 04:03:14 -0700 (PDT) Subject: [pypy-commit] pypy default: the .debug files don't need to be executable, remove the 'x' bit Message-ID: <59fafb72.88c5df0a.9d3a1.ed36@mx.google.com> Author: Antonio Cuni Branch: Changeset: r92904:84a682f2e00a Date: 2017-11-02 11:59 +0100 http://bitbucket.org/pypy/pypy/changeset/84a682f2e00a/ Log: the .debug files don't need to be executable, remove the 'x' bit diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py --- a/pypy/tool/release/smartstrip.py +++ b/pypy/tool/release/smartstrip.py @@ -19,6 +19,9 @@ if sys.platform == 'linux2': os.system("objcopy --only-keep-debug %s %s" % (exe, debug)) os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe)) + perm = debug.stat().mode + perm &= ~(0111) # remove the 'x' bit + debug.chmod(perm) def smartstrip(exe, keep_debug=True): exe = py.path.local(exe) diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py --- a/pypy/tool/release/test/test_smartstrip.py +++ b/pypy/tool/release/test/test_smartstrip.py @@ -42,6 +42,9 @@ smartstrip(exe, keep_debug=True) debug = tmpdir.join("myprog.debug") assert debug.check(file=True) + perm = debug.stat().mode & 0777 + assert perm & 0111 == 0 # 'x' bit not set + # info = info_symbol(exe, "foo") assert info == "foo in section .text of %s" % exe # From pypy.commits at gmail.com Thu Nov 2 07:03:16 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 02 Nov 2017 04:03:16 -0700 (PDT) Subject: [pypy-commit] pypy default: make sure to extract also the .debug files from the nightly Message-ID: <59fafb74.4f931c0a.e68c0.dd68@mx.google.com> Author: Antonio Cuni Branch: Changeset: r92905:060286c2177e Date: 2017-11-02 12:02 +0100 http://bitbucket.org/pypy/pypy/changeset/060286c2177e/ Log: make sure to extract also the .debug files from the nightly diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): From pypy.commits at gmail.com Thu Nov 2 11:06:00 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 08:06:00 -0700 (PDT) Subject: [pypy-commit] pypy bsd-patches: close branch to be merged Message-ID: <59fb3458.09a0df0a.46c79.5dd4@mx.google.com> Author: Matti Picus Branch: bsd-patches Changeset: r92907:5d3a56b266eb Date: 2017-11-02 16:58 +0200 http://bitbucket.org/pypy/pypy/changeset/5d3a56b266eb/ Log: close branch to be merged From pypy.commits at gmail.com Thu Nov 2 11:06:02 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 08:06:02 -0700 (PDT) Subject: [pypy-commit] pypy default: merge bsd-patches which fixes failures on FreeBSD (David Naylor) Message-ID: <59fb345a.14a1df0a.783a7.1943@mx.google.com> Author: Matti Picus Branch: Changeset: r92908:959da02fe2dc Date: 2017-11-02 16:59 +0200 http://bitbucket.org/pypy/pypy/changeset/959da02fe2dc/ Log: merge bsd-patches which fixes failures on FreeBSD (David Naylor) diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -314,7 +314,7 @@ # ======================================================================== class W_CDLL(W_Root): - def __init__(self, space, name, mode): + def __init__(self, space, name, mode, handle): self.flags = libffi.FUNCFLAG_CDECL self.space = space if name is None: @@ -322,7 +322,7 @@ else: self.name = name try: - self.cdll = libffi.CDLL(name, mode) + self.cdll = libffi.CDLL(name, mode, handle) except DLOpenError as e: raise wrap_dlopenerror(space, e, self.name) except OSError as e: @@ -344,9 +344,9 @@ def getidentifier(self, space): return space.newint(self.cdll.getidentifier()) - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_cdll(space, w_type, name, mode=-1): - return W_CDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_cdll(space, w_type, name, mode=-1, handle=0): + return W_CDLL(space, name, mode, handle) W_CDLL.typedef = TypeDef( @@ -359,13 +359,13 @@ ) class W_WinDLL(W_CDLL): - def __init__(self, space, name, mode): - W_CDLL.__init__(self, space, name, mode) + def __init__(self, space, name, mode, handle): + W_CDLL.__init__(self, space, name, mode, handle) self.flags = libffi.FUNCFLAG_STDCALL - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_windll(space, w_type, name, mode=-1): - return W_WinDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_windll(space, w_type, name, mode=-1, handle=0): + return W_WinDLL(space, name, mode, handle) W_WinDLL.typedef = TypeDef( @@ -380,4 +380,4 @@ # ======================================================================== def get_libc(space): - return W_CDLL(space, get_libc_name(), -1) + return W_CDLL(space, get_libc_name(), -1, 0) diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -1,3 +1,4 @@ +import py import sys from rpython.tool.udir import udir from pypy.tool.pytest.objspace import gettestobjspace @@ -107,6 +108,7 @@ _vmprof.disable() assert _vmprof.is_enabled() is False + @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented") def test_get_profile_path(self): import _vmprof tmpfile = open(self.tmpfilename, 'wb') diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py --- a/pypy/module/termios/test/test_termios.py +++ b/pypy/module/termios/test/test_termios.py @@ -7,9 +7,6 @@ if os.name != 'posix': py.test.skip('termios module only available on unix') -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') - class TestTermios(object): def setup_class(cls): try: diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py --- a/pypy/module/test_lib_pypy/pyrepl/__init__.py +++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py @@ -1,6 +1,3 @@ import sys import lib_pypy.pyrepl sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl'] - -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py --- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py +++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py @@ -4,7 +4,7 @@ @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or " - "'kfreebsd' in sys.platform") + "'freebsd' in sys.platform") def test_raw_input(): import os import pty diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py --- a/pypy/tool/cpyext/extbuild.py +++ b/pypy/tool/cpyext/extbuild.py @@ -244,13 +244,13 @@ if sys.platform == 'win32': compile_extra = ["/we4013"] link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')] - elif sys.platform == 'darwin': - compile_extra = link_extra = None - pass elif sys.platform.startswith('linux'): compile_extra = [ "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"] link_extra = None + else: + compile_extra = link_extra = None + pass return ExtensionCompiler( builddir_base=base_dir, include_extra=[get_python_inc()], diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py --- a/rpython/rlib/libffi.py +++ b/rpython/rlib/libffi.py @@ -434,11 +434,12 @@ # XXX: it partially duplicate the code in clibffi.py class CDLL(object): - def __init__(self, libname, mode=-1): + def __init__(self, libname, mode=-1, lib=0): """Load the library, or raises DLOpenError.""" - self.lib = rffi.cast(DLLHANDLE, 0) - with rffi.scoped_str2charp(libname) as ll_libname: - self.lib = dlopen(ll_libname, mode) + self.lib = rffi.cast(DLLHANDLE, lib) + if lib == 0: + with rffi.scoped_str2charp(libname) as ll_libname: + self.lib = dlopen(ll_libname, mode) def __del__(self): if self.lib: diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -47,7 +47,10 @@ # Guessing a BSD-like Unix platform compile_extra += ['-DVMPROF_UNIX'] compile_extra += ['-DVMPROF_MAC'] - _libs = [] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] eci_kwds = dict( diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -1147,7 +1147,7 @@ libc_name = get_libc_name() # Make sure the name is determined during import, not at runtime if _FREEBSD: RTLD_DEFAULT = -2 # see - rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs) + rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs) # XXX is this always correct??? standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs) @@ -1243,7 +1243,7 @@ if cfunc is None: if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'): - cfunc = get_on_lib(rtld_default_lib, funcname) + cfunc = rtld_default_lib[funcname] else: cfunc = get_on_lib(standard_c_lib, funcname) # XXX magic: on Windows try to load the function from 'kernel32' too From pypy.commits at gmail.com Thu Nov 2 11:05:58 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 08:05:58 -0700 (PDT) Subject: [pypy-commit] pypy default: help tests find msv compiler Message-ID: <59fb3456.cc091c0a.9de42.6f6d@mx.google.com> Author: Matti Picus Branch: Changeset: r92906:2177b95b1174 Date: 2017-11-02 16:48 +0200 http://bitbucket.org/pypy/pypy/changeset/2177b95b1174/ Log: help tests find msv compiler diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) From pypy.commits at gmail.com Thu Nov 2 11:06:04 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 02 Nov 2017 08:06:04 -0700 (PDT) Subject: [pypy-commit] pypy default: document merge branches Message-ID: <59fb345c.86081c0a.3b4f8.59cf@mx.google.com> Author: Matti Picus Branch: Changeset: r92909:8ee02499c7fd Date: 2017-11-02 17:08 +0200 http://bitbucket.org/pypy/pypy/changeset/8ee02499c7fd/ Log: document merge branches diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,10 @@ .. branch: docs-osx-brew-openssl +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) From pypy.commits at gmail.com Thu Nov 2 12:34:24 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 02 Nov 2017 09:34:24 -0700 (PDT) Subject: [pypy-commit] pypy run-extra-tests: Add extra_tests/requirements.txt Message-ID: <59fb4910.d08edf0a.9c5a8.0d80@mx.google.com> Author: Ronan Lamy Branch: run-extra-tests Changeset: r92910:c0b920761a24 Date: 2017-11-02 16:33 +0000 http://bitbucket.org/pypy/pypy/changeset/c0b920761a24/ Log: Add extra_tests/requirements.txt diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis From pypy.commits at gmail.com Thu Nov 2 13:38:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 02 Nov 2017 10:38:53 -0700 (PDT) Subject: [pypy-commit] pypy run-extra-tests: Add a failing test Message-ID: <59fb582d.028b1c0a.476b5.1747@mx.google.com> Author: Ronan Lamy Branch: run-extra-tests Changeset: r92911:1dc82bde8716 Date: 2017-11-02 17:38 +0000 http://bitbucket.org/pypy/pypy/changeset/1dc82bde8716/ Log: Add a failing test diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_failing.py @@ -0,0 +1,8 @@ +from hypothesis import given, strategies + +def mean(a, b): + return (a + b)/2. + + at given(strategies.integers(), strategies.integers()) +def test_mean_failing(a, b): + assert mean(a, b) >= min(a, b) From pypy.commits at gmail.com Fri Nov 3 09:39:01 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 03 Nov 2017 06:39:01 -0700 (PDT) Subject: [pypy-commit] pypy matplotlib: remove array-to-obj conversion, do it in matplotlib instead Message-ID: <59fc7175.a1b6500a.cf022.23b0@mx.google.com> Author: Matti Picus Branch: matplotlib Changeset: r92913:cfbf50f45366 Date: 2017-11-03 14:19 +0200 http://bitbucket.org/pypy/pypy/changeset/cfbf50f45366/ Log: remove array-to-obj conversion, do it in matplotlib instead diff --git a/lib_pypy/_tkinter/tclobj.py b/lib_pypy/_tkinter/tclobj.py --- a/lib_pypy/_tkinter/tclobj.py +++ b/lib_pypy/_tkinter/tclobj.py @@ -2,11 +2,6 @@ from .tklib_cffi import ffi as tkffi, lib as tklib import binascii -try: - import numpy as np - hasNumpy = True -except ImportError: - hasNumpy = False class TypeCache(object): def __init__(self): @@ -91,29 +86,6 @@ finally: tklib.mp_clear(bigValue) -def AsObjNDArray(value): - # XXX there must be a better way - argv = tkffi.new("Tcl_Obj*[]", 3) - argv[0] = AsObj(' '.join([str(x) for x in value.shape])) - argv[1] = AsObj(value.dtype.str) - asstr = value.tostring() - argv[2] = AsObj(binascii.b2a_hex(asstr)) - return tklib.Tcl_NewListObj(3, argv) - -def FromTclStringNDArray(data): - # unconvert data, assuming it is stringified from AsObjNDArray - indx1 = data.find(b'}') - shape = map(int, data[1:indx1].split()) - size = np.prod(shape) - indx2 = data.find(b' ', indx1 + 2) - dtype = np.dtype(data[indx1 + 2:indx2]) - start = indx2+1 - stop = start + size * dtype.itemsize * 2 - if stop > len(data): - raise ValueError('data too short') - vals = binascii.a2b_hex(data[start:stop]) - return np.fromstring(vals, dtype=dtype).reshape(shape) - def FromObj(app, value): """Convert a TclObj pointer into a Python object.""" typeCache = app._typeCache @@ -202,9 +174,6 @@ if isinstance(value, TclObject): tklib.Tcl_IncrRefCount(value._value) return value._value - if hasNumpy and isinstance(value, np.ndarray): - return AsObjNDArray(value) - return AsObj(str(value)) class TclObject(object): From pypy.commits at gmail.com Fri Nov 3 09:39:03 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 03 Nov 2017 06:39:03 -0700 (PDT) Subject: [pypy-commit] pypy default: rename uu to something more unique, maybe fixes tests? (arigato) Message-ID: <59fc7177.bbb7500a.54de0.dddc@mx.google.com> Author: Matti Picus Branch: Changeset: r92914:5c8b7f2cd6b7 Date: 2017-11-03 15:38 +0200 http://bitbucket.org/pypy/pypy/changeset/5c8b7f2cd6b7/ Log: rename uu to something more unique, maybe fixes tests? (arigato) diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py --- a/pypy/module/zipimport/test/test_zipimport.py +++ b/pypy/module/zipimport/test/test_zipimport.py @@ -194,9 +194,9 @@ m0 = ord(self.test_pyc[0]) m0 ^= 0x04 test_pyc = chr(m0) + self.test_pyc[1:] - self.writefile("uu.pyc", test_pyc) + self.writefile("xxbad_pyc.pyc", test_pyc) raises(zipimport.ZipImportError, - "__import__('uu', globals(), locals(), [])") + "__import__('xxbad_pyc', globals(), locals(), [])") assert 'uu' not in sys.modules def test_force_py(self): @@ -204,9 +204,9 @@ m0 = ord(self.test_pyc[0]) m0 ^= 0x04 test_pyc = chr(m0) + self.test_pyc[1:] - self.writefile("uu.pyc", test_pyc) - self.writefile("uu.py", "def f(x): return x") - mod = __import__("uu", globals(), locals(), []) + self.writefile("xxforce_py.pyc", test_pyc) + self.writefile("xxforce_py.py", "def f(x): return x") + mod = __import__("xxforce_py", globals(), locals(), []) assert mod.f(3) == 3 def test_sys_modules(self): From pypy.commits at gmail.com Fri Nov 3 09:38:59 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 03 Nov 2017 06:38:59 -0700 (PDT) Subject: [pypy-commit] pypy matplotlib: merge default into branch Message-ID: <59fc7173.e184500a.38188.b627@mx.google.com> Author: Matti Picus Branch: matplotlib Changeset: r92912:112fed2c005c Date: 2017-11-02 18:27 +0200 http://bitbucket.org/pypy/pypy/changeset/112fed2c005c/ Log: merge default into branch diff too long, truncating to 2000 out of 4915 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -71,6 +71,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1027,21 +1027,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst --- a/pypy/doc/project-ideas.rst +++ b/pypy/doc/project-ideas.rst @@ -240,9 +240,12 @@ **matplotlib** https://github.com/matplotlib/matplotlib - TODO: the tkagg backend does not work, which makes tests fail on downstream - projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in - tkagg.py, which requires refactoring + Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of + matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the + tkagg backend can function. + + TODO: the matplotlib branch passes numpy arrays by value (copying all the + data), this proof-of-concept needs help to become completely compliant **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,4 +5,15 @@ .. this is a revision shortly after release-pypy2.7-v5.9.0 .. startrev:d56dadcef996 +.. branch: cppyy-packaging +Cleanup and improve cppyy packaging +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -66,20 +66,17 @@ "position %d from error handler out of bounds", newpos) replace = space.unicode_w(w_replace) - return replace, newpos + if decode: + return replace, newpos + else: + return replace, None, newpos return call_errorhandler def make_decode_errorhandler(self, space): return self._make_errorhandler(space, True) def make_encode_errorhandler(self, space): - errorhandler = self._make_errorhandler(space, False) - def encode_call_errorhandler(errors, encoding, reason, input, startpos, - endpos): - replace, newpos = errorhandler(errors, encoding, reason, input, - startpos, endpos) - return replace, None, newpos - return encode_call_errorhandler + return self._make_errorhandler(space, False) def get_unicodedata_handler(self, space): if self.unicodedata_handler: diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -290,66 +290,87 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet # - def g(c): + def bar(c): c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -7,7 +7,7 @@ interpleveldefs = { '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', @@ -15,7 +15,9 @@ '_get_nullptr' : 'interp_cppyy.get_nullptr', 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -217,7 +217,8 @@ 'method_req_args' : ([c_scope, c_index], c_int), 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index], c_ccharp), + 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), 'method_is_template' : ([c_scope, c_index], c_int), 'method_num_template_args' : ([c_scope, c_index], c_int), @@ -498,9 +499,12 @@ def c_method_arg_default(space, cppscope, index, arg_index): args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_method_signature(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) +def c_method_prototype(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] + return charp2str_free(space, call_capi(space, 'method_prototype', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -4,7 +4,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance @@ -21,9 +21,9 @@ # match for the qualified type. -def get_rawobject(space, w_obj): +def get_rawobject(space, w_obj, can_be_None=True): from pypy.module._cppyy.interp_cppyy import W_CPPClass - cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None) if cppinstance: rawobject = cppinstance.get_rawobject() assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -48,17 +48,16 @@ return capi.C_NULL_OBJECT def is_nullpointer_specialcase(space, w_obj): - # 0, None, and nullptr may serve as "NULL", check for any of them + # 0 and nullptr may serve as "NULL" # integer 0 try: return space.int_w(w_obj) == 0 except Exception: pass - # None or nullptr + # C++-style nullptr from pypy.module._cppyy import interp_cppyy - return space.is_true(space.is_(w_obj, space.w_None)) or \ - space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) + return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) def get_rawbuffer(space, w_obj): # raw buffer @@ -74,7 +73,7 @@ return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass - # pre-defined NULL + # pre-defined nullptr if is_nullpointer_specialcase(space, w_obj): return rffi.cast(rffi.VOIDP, 0) raise TypeError("not an addressable buffer") @@ -392,6 +391,7 @@ _immutable_fields_ = ['typecode'] typecode = 'g' + class CStringConverter(TypeConverter): def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.LONGP, address) @@ -408,18 +408,27 @@ def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') +class CStringConverterWithSize(CStringConverter): + _immutable_fields_ = ['size'] + + def __init__(self, space, extra): + self.size = extra + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = self._get_raw_address(space, w_obj, offset) + charpptr = rffi.cast(rffi.CCHARP, address) + strsize = self.size + if charpptr[self.size-1] == '\0': + strsize = self.size-1 # rffi will add \0 back + return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + class VoidPtrConverter(TypeConverter): def _unwrap_object(self, space, w_obj): try: obj = get_rawbuffer(space, w_obj) except TypeError: - try: - # TODO: accept a 'capsule' rather than naked int - # (do accept int(0), though) - obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj)) - except Exception: - obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) + obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False)) return obj def cffi_type(self, space): @@ -463,12 +472,12 @@ def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) ba = rffi.cast(rffi.CCHARP, address) - r = rffi.cast(rffi.VOIDPP, call_local) try: - r[0] = get_rawbuffer(space, w_obj) + x[0] = get_rawbuffer(space, w_obj) except TypeError: + r = rffi.cast(rffi.VOIDPP, call_local) r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) - x[0] = rffi.cast(rffi.VOIDP, call_local) + x[0] = rffi.cast(rffi.VOIDP, call_local) ba[capi.c_function_arg_typeoffset(space)] = self.typecode def finalize_call(self, space, w_obj, call_local): @@ -495,9 +504,13 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPClass if isinstance(w_obj, W_CPPClass): - if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl): + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + # reject moves as all are explicit + raise ValueError("lvalue expected") + if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): rawobject = w_obj.get_rawobject() - offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1) + offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1) obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, @@ -518,6 +531,17 @@ x = rffi.cast(rffi.VOIDPP, address) x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) +class InstanceMoveConverter(InstanceRefConverter): + def _unwrap_object(self, space, w_obj): + # moving is same as by-ref, but have to check that move is allowed + from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE + if isinstance(w_obj, W_CPPClass): + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE + return InstanceRefConverter._unwrap_object(self, space, w_obj) + raise oefmt(space.w_ValueError, "object is not an rvalue") + + class InstanceConverter(InstanceRefConverter): def convert_argument_libffi(self, space, w_obj, address, call_local): @@ -527,7 +551,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): self._is_abstract(space) @@ -548,7 +572,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -582,8 +606,8 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, - do_cast=False, is_ref=True) + return interp_cppyy.wrap_cppinstance( + space, address, self.clsdecl, do_cast=False, is_ref=True) class StdStringConverter(InstanceConverter): @@ -606,7 +630,7 @@ assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy assign.call( - interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value]) + interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -672,7 +696,7 @@ _converters = {} # builtin and custom types _a_converters = {} # array and ptr versions of above -def get_converter(space, name, default): +def get_converter(space, _name, default): # The matching of the name to a converter should follow: # 1) full, exact match # 1a) const-removed match @@ -680,9 +704,9 @@ # 3) accept ref as pointer (for the stubs, const& can be # by value, but that does not work for the ffi path) # 4) generalized cases (covers basically all user classes) - # 5) void converter, which fails on use + # 5) void* or void converter (which fails on use) - name = capi.c_resolve_name(space, name) + name = capi.c_resolve_name(space, _name) # 1) full, exact match try: @@ -701,7 +725,7 @@ clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found - array_size = helper.array_size(name) + array_size = helper.array_size(_name) # uses original arg return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -719,6 +743,8 @@ return InstancePtrConverter(space, clsdecl) elif compound == "&": return InstanceRefConverter(space, clsdecl) + elif compound == "&&": + return InstanceMoveConverter(space, clsdecl) elif compound == "**": return InstancePtrPtrConverter(space, clsdecl) elif compound == "": @@ -726,11 +752,13 @@ elif capi.c_is_enum(space, clean_name): return _converters['unsigned'](space, default) - # 5) void converter, which fails on use - # + # 5) void* or void converter (which fails on use) + if 0 <= compound.find('*'): + return VoidPtrConverter(space, default) # "user knows best" + # return a void converter here, so that the class can be build even - # when some types are unknown; this overload will simply fail on use - return VoidConverter(space, name) + # when some types are unknown + return VoidConverter(space, name) # fails on use _converters["bool"] = BoolConverter @@ -847,6 +875,10 @@ for name in names: _a_converters[name+'[]'] = ArrayConverter _a_converters[name+'*'] = PtrConverter + + # special case, const char* w/ size and w/o '\0' + _a_converters["const char[]"] = CStringConverterWithSize + _build_array_converters() # add another set of aliased names diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -159,7 +159,7 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) return pyres def execute_libffi(self, space, cif_descr, funcaddr, buffer): @@ -167,7 +167,7 @@ result = rffi.ptradd(buffer, cif_descr.exchange_result) from pypy.module._cppyy import interp_cppyy ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) class InstancePtrPtrExecutor(InstancePtrExecutor): @@ -176,7 +176,7 @@ voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) ref_address = rffi.cast(rffi.VOIDPP, voidp_result) ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible @@ -188,8 +188,8 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, + do_cast=False, python_owns=True, fresh=True) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -19,14 +19,15 @@ RPY_EXTERN int cppyy_num_scopes(cppyy_scope_t parent); RPY_EXTERN - char* cppyy_scope_name(cppyy_scope_t parent, int iscope); - + char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope); RPY_EXTERN char* cppyy_resolve_name(const char* cppitem_name); RPY_EXTERN cppyy_scope_t cppyy_get_scope(const char* scope_name); RPY_EXTERN cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj); + RPY_EXTERN + size_t cppyy_size_of(cppyy_type_t klass); /* memory management ------------------------------------------------------ */ RPY_EXTERN @@ -120,6 +121,8 @@ RPY_EXTERN char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN + char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); @@ -130,7 +133,9 @@ RPY_EXTERN char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); @@ -147,8 +152,12 @@ /* method properties ------------------------------------------------------ */ RPY_EXTERN + int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); RPY_EXTERN + int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); /* data member reflection information ------------------------------------- */ diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -2,7 +2,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec -from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w +from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty from pypy.interpreter.baseobjspace import W_Root from rpython.rtyper.lltypesystem import rffi, lltype, llmemory @@ -15,6 +15,10 @@ from pypy.module._cppyy import converter, executor, ffitypes, helper +INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 +INSTANCE_FLAGS_IS_REF = 0x0002 +INSTANCE_FLAGS_IS_R_VALUE = 0x0004 + class FastCallNotPossible(Exception): pass @@ -33,16 +37,21 @@ class State(object): def __init__(self, space): + # final scoped name -> opaque handle self.cppscope_cache = { - "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) } + 'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') } + # opaque handle -> app-level python class + self.cppclass_registry = {} + # app-level class generator callback + self.w_clgen_callback = None + # app-level function generator callback (currently not used) + self.w_fngen_callback = None + # C++11's nullptr self.w_nullptr = None - self.cpptemplate_cache = {} - self.cppclass_registry = {} - self.w_clgen_callback = None - self.w_fngen_callback = None def get_nullptr(space): - if hasattr(space, "fake"): + # construct a unique address that compares to NULL, serves as nullptr + if hasattr(space, 'fake'): raise NotImplementedError state = space.fromcache(State) if state.w_nullptr is None: @@ -58,52 +67,48 @@ state.w_nullptr = nullarr return state.w_nullptr - at unwrap_spec(name='text') -def resolve_name(space, name): - return space.newtext(capi.c_resolve_name(space, name)) + at unwrap_spec(scoped_name='text') +def resolve_name(space, scoped_name): + return space.newtext(capi.c_resolve_name(space, scoped_name)) - at unwrap_spec(name='text') -def scope_byname(space, name): - true_name = capi.c_resolve_name(space, name) +# memoized lookup of handles by final, scoped, name of classes/namespaces + at unwrap_spec(final_scoped_name='text') +def scope_byname(space, final_scoped_name): state = space.fromcache(State) try: - return state.cppscope_cache[true_name] + return state.cppscope_cache[final_scoped_name] except KeyError: pass - opaque_handle = capi.c_get_scope_opaque(space, true_name) + opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name) assert lltype.typeOf(opaque_handle) == capi.C_SCOPE if opaque_handle: - final_name = capi.c_final_name(space, opaque_handle) - if capi.c_is_namespace(space, opaque_handle): - cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle) - elif capi.c_has_complex_hierarchy(space, opaque_handle): - cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle) + isns = capi.c_is_namespace(space, opaque_handle) + if isns: + cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name) else: - cppscope = W_CPPClassDecl(space, final_name, opaque_handle) - state.cppscope_cache[name] = cppscope + if capi.c_has_complex_hierarchy(space, opaque_handle): + cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name) + else: + cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name) - cppscope._build_methods() - cppscope._find_datamembers() + # store in the cache to prevent recursion + state.cppscope_cache[final_scoped_name] = cppscope + + if not isns: + # build methods/data; TODO: also defer this for classes (a functional __dir__ + # and instrospection for help() is enough and allows more lazy loading) + cppscope._build_methods() + cppscope._find_datamembers() + return cppscope return None - at unwrap_spec(name='text') -def template_byname(space, name): - state = space.fromcache(State) - try: - return state.cpptemplate_cache[name] - except KeyError: - pass - - if capi.c_is_template(space, name): - cpptemplate = W_CPPTemplateType(space, name) - state.cpptemplate_cache[name] = cpptemplate - return cpptemplate - - return None + at unwrap_spec(final_scoped_name='text') +def is_template(space, final_scoped_name): + return space.newbool(capi.c_is_template(space, final_scoped_name)) def std_string_name(space): return space.newtext(capi.std_string_name) @@ -189,8 +194,13 @@ # check number of given arguments against required (== total - defaults) args_expected = len(self.arg_defs) args_given = len(args_w) - if args_expected < args_given or args_given < self.args_required: - raise oefmt(self.space.w_TypeError, "wrong number of arguments") + + if args_given < self.args_required: + raise oefmt(self.space.w_TypeError, + "takes at least %d arguments (%d given)", self.args_required, args_given) + elif args_expected < args_given: + raise oefmt(self.space.w_TypeError, + "takes at most %d arguments (%d given)", args_expected, args_given) # initial setup of converters, executors, and libffi (if available) if self.converters is None: @@ -376,8 +386,11 @@ conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i) capi.c_deallocate_function_args(self.space, args) - def signature(self): - return capi.c_method_signature(self.space, self.scope, self.index) + def signature(self, show_formalargs=True): + return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + + def prototype(self, show_formalargs=True): + return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) def priority(self): total_arg_priority = 0 @@ -391,7 +404,7 @@ lltype.free(self.cif_descr, flavor='raw') def __repr__(self): - return "CPPMethod: %s" % self.signature() + return "CPPMethod: %s" % self.prototype() def _freeze_(self): assert 0, "you should never have a pre-built instance of this!" @@ -407,7 +420,7 @@ return capi.C_NULL_OBJECT def __repr__(self): - return "CPPFunction: %s" % self.signature() + return "CPPFunction: %s" % self.prototype() class CPPTemplatedCall(CPPMethod): @@ -440,7 +453,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPTemplatedCall: %s" % self.signature() + return "CPPTemplatedCall: %s" % self.prototype() class CPPConstructor(CPPMethod): @@ -462,7 +475,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPConstructor: %s" % self.signature() + return "CPPConstructor: %s" % self.prototype() class CPPSetItem(CPPMethod): @@ -549,12 +562,12 @@ w_exc_type = e.w_type elif all_same_type and not e.match(self.space, w_exc_type): all_same_type = False - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' '+e.errorstr(self.space) except Exception as e: # can not special case this for non-overloaded functions as we anyway need an # OperationError error down from here - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' Exception: '+str(e) if all_same_type and w_exc_type is not None: @@ -562,20 +575,20 @@ else: raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg)) - def signature(self): - sig = self.functions[0].signature() + def prototype(self): + sig = self.functions[0].prototype() for i in range(1, len(self.functions)): - sig += '\n'+self.functions[i].signature() + sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) def __repr__(self): - return "W_CPPOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', is_static = interp2app(W_CPPOverload.is_static), call = interp2app(W_CPPOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPOverload.prototype), ) @@ -591,24 +604,40 @@ @jit.unroll_safe @unwrap_spec(args_w='args_w') def call(self, w_cppinstance, args_w): + # TODO: factor out the following: + if capi.c_is_abstract(self.space, self.scope.handle): + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.scope.name) w_result = W_CPPOverload.call(self, w_cppinstance, args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) - return w_cppinstance - return wrap_cppobject(self.space, newthis, self.functions[0].scope, - do_cast=False, python_owns=True, fresh=True) def __repr__(self): - return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions] W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', is_static = interp2app(W_CPPConstructorOverload.is_static), call = interp2app(W_CPPConstructorOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPConstructorOverload.prototype), +) + + +class W_CPPTemplateOverload(W_CPPOverload): + @unwrap_spec(args_w='args_w') + def __getitem__(self, args_w): + pass + + def __repr__(self): + return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPTemplateOverload.typedef = TypeDef( + 'CPPTemplateOverload', + __getitem__ = interp2app(W_CPPTemplateOverload.call), ) @@ -622,6 +651,9 @@ def __call__(self, args_w): return self.method.bound_call(self.cppthis, args_w) + def __repr__(self): + return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions] + W_CPPBoundMethod.typedef = TypeDef( 'CPPBoundMethod', __call__ = interp2app(W_CPPBoundMethod.__call__), @@ -643,8 +675,8 @@ def _get_offset(self, cppinstance): if cppinstance: - assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle) - offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope) + assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle) + offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope) else: offset = self.offset return offset @@ -652,7 +684,7 @@ def get(self, w_cppinstance, w_pycppclass): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset) @@ -660,7 +692,7 @@ def set(self, w_cppinstance, w_value): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) self.converter.to_memory(self.space, w_cppinstance, w_value, offset) @@ -705,12 +737,12 @@ return space.w_False class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def __init__(self, space, name, opaque_handle): + def __init__(self, space, opaque_handle, final_scoped_name): self.space = space - self.name = name + self.name = final_scoped_name assert lltype.typeOf(opaque_handle) == capi.C_SCOPE self.handle = opaque_handle self.methods = {} @@ -753,7 +785,7 @@ overload = self.get_overload(name) sig = '(%s)' % signature for f in overload.functions: - if 0 < f.signature().find(sig): + if f.signature(False) == sig: return W_CPPOverload(self.space, self, [f]) raise oefmt(self.space.w_LookupError, "no overload matches signature") @@ -769,6 +801,9 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name'] + def _make_cppfunction(self, pyname, index): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) @@ -779,9 +814,6 @@ arg_defs.append((arg_type, arg_dflt)) return CPPFunction(self.space, self, index, arg_defs, args_required) - def _build_methods(self): - pass # force lazy lookups in namespaces - def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) offset = capi.c_datamember_offset(self.space, self, dm_idx) @@ -791,9 +823,6 @@ self.datamembers[dm_name] = datamember return datamember - def _find_datamembers(self): - pass # force lazy lookups in namespaces - def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) if not indices: @@ -855,18 +884,21 @@ class W_CPPClassDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] - _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]'] def _build_methods(self): assert len(self.methods) == 0 methods_temp = {} for i in range(capi.c_num_methods(self.space, self)): idx = capi.c_method_index_at(self.space, self, i) - pyname = helper.map_operator_name(self.space, - capi.c_method_name(self.space, self, idx), - capi.c_method_num_args(self.space, self, idx), - capi.c_method_result_type(self.space, self, idx)) + if capi.c_is_constructor(self.space, self, idx): + pyname = '__init__' + else: + pyname = helper.map_operator_name(self.space, + capi.c_method_name(self.space, self, idx), + capi.c_method_num_args(self.space, self, idx), + capi.c_method_result_type(self.space, self, idx)) cppmethod = self._make_cppfunction(pyname, idx) methods_temp.setdefault(pyname, []).append(cppmethod) # the following covers the case where the only kind of operator[](idx) @@ -883,7 +915,7 @@ # create the overload methods from the method sets for pyname, methods in methods_temp.iteritems(): CPPMethodSort(methods).sort() - if pyname == self.name: + if pyname == '__init__': overload = W_CPPConstructorOverload(self.space, self, methods[:]) else: overload = W_CPPOverload(self.space, self, methods[:]) @@ -934,11 +966,11 @@ raise self.missing_attribute_error(name) def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return 0 def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return cppinstance.get_rawobject() def is_namespace(self): @@ -973,13 +1005,13 @@ class W_CPPComplexClassDecl(W_CPPClassDecl): def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = capi.c_base_offset(self.space, self, calling_scope, cppinstance.get_rawobject(), 1) return offset def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = self.get_base_offset(cppinstance, calling_scope) return capi.direct_ptradd(cppinstance.get_rawobject(), offset) @@ -997,70 +1029,56 @@ W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False -class W_CPPTemplateType(W_Root): - _attrs_ = ['space', 'name'] - _immutable_fields = ['name'] - - def __init__(self, space, name): - self.space = space - self.name = name - - @unwrap_spec(args_w='args_w') - def __call__(self, args_w): - # TODO: this is broken but unused (see pythonify.py) - fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>']) - return scope_byname(self.space, fullname) - -W_CPPTemplateType.typedef = TypeDef( - 'CPPTemplateType', - __call__ = interp2app(W_CPPTemplateType.__call__), -) -W_CPPTemplateType.typedef.acceptable_as_base_class = False - - class W_CPPClass(W_Root): - _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns', + _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags', 'finalizer_registered'] - _immutable_fields_ = ["cppclass", "isref"] + _immutable_fields_ = ['clsdecl'] finalizer_registered = False - def __init__(self, space, cppclass, rawobject, isref, python_owns): + def __init__(self, space, decl, rawobject, isref, python_owns): self.space = space - self.cppclass = cppclass + self.clsdecl = decl assert lltype.typeOf(rawobject) == capi.C_OBJECT assert not isref or rawobject self._rawobject = rawobject assert not isref or not python_owns - self.isref = isref - self.python_owns = python_owns - self._opt_register_finalizer() + self.flags = 0 + if isref: + self.flags |= INSTANCE_FLAGS_IS_REF + if python_owns: + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() def _opt_register_finalizer(self): - if self.python_owns and not self.finalizer_registered \ - and not hasattr(self.space, "fake"): + if not self.finalizer_registered and not hasattr(self.space, "fake"): + assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS self.register_finalizer(self.space) self.finalizer_registered = True def _nullcheck(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): raise oefmt(self.space.w_ReferenceError, "trying to access a NULL pointer") # allow user to determine ownership rules on a per object level def fget_python_owns(self, space): - return space.newbool(self.python_owns) + return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS)) @unwrap_spec(value=bool) def fset_python_owns(self, space, value): - self.python_owns = space.is_true(value) - self._opt_register_finalizer() + if space.is_true(value): + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() + else: + self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS def get_cppthis(self, calling_scope): - return self.cppclass.get_cppthis(self, calling_scope) + return self.clsdecl.get_cppthis(self, calling_scope) def get_rawobject(self): - if not self.isref: + if not (self.flags & INSTANCE_FLAGS_IS_REF): return self._rawobject else: ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject) @@ -1078,12 +1096,9 @@ return None def instance__init__(self, args_w): - if capi.c_is_abstract(self.space, self.cppclass.handle): - raise oefmt(self.space.w_TypeError, - "cannot instantiate abstract class '%s'", - self.cppclass.name) - constructor_overload = self.cppclass.get_overload(self.cppclass.name) - constructor_overload.call(self, args_w) + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.clsdecl.name) def instance__eq__(self, w_other): # special case: if other is None, compare pointer-style @@ -1099,7 +1114,7 @@ for name in ["", "__gnu_cxx", "__1"]: nss = scope_byname(self.space, name) meth_idx = capi.c_get_global_operator( - self.space, nss, self.cppclass, other.cppclass, "operator==") + self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: f = nss._make_cppfunction("operator==", meth_idx) ol = W_CPPOverload(self.space, nss, [f]) @@ -1118,14 +1133,15 @@ # fallback 2: direct pointer comparison (the class comparison is needed since # the first data member in a struct and the struct have the same address) other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False) # TODO: factor out - iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass) + iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl) return self.space.newbool(iseq) def instance__ne__(self, w_other): return self.space.not_(self.instance__eq__(w_other)) def instance__nonzero__(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): return self.space.w_False return self.space.w_True @@ -1134,36 +1150,35 @@ if w_as_builtin is not None: return self.space.len(w_as_builtin) raise oefmt(self.space.w_TypeError, - "'%s' has no length", self.cppclass.name) + "'%s' has no length", self.clsdecl.name) def instance__cmp__(self, w_other): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.cmp(w_as_builtin, w_other) raise oefmt(self.space.w_AttributeError, - "'%s' has no attribute __cmp__", self.cppclass.name) + "'%s' has no attribute __cmp__", self.clsdecl.name) def instance__repr__(self): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.repr(w_as_builtin) return self.space.newtext("<%s object at 0x%x>" % - (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) + (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) def destruct(self): - if self._rawobject and not self.isref: + if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): memory_regulator.unregister(self) - capi.c_destruct(self.space, self.cppclass, self._rawobject) + capi.c_destruct(self.space, self.clsdecl, self._rawobject) self._rawobject = capi.C_NULL_OBJECT def _finalize_(self): - if self.python_owns: + if self.flags & INSTANCE_FLAGS_PYTHON_OWNS: self.destruct() W_CPPClass.typedef = TypeDef( 'CPPClass', - cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass), - _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), + __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), __init__ = interp2app(W_CPPClass.instance__init__), __eq__ = interp2app(W_CPPClass.instance__eq__), __ne__ = interp2app(W_CPPClass.instance__ne__), @@ -1220,21 +1235,21 @@ state = space.fromcache(State) return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar)) -def wrap_cppobject(space, rawobject, cppclass, - do_cast=True, python_owns=False, is_ref=False, fresh=False): +def wrap_cppinstance(space, rawobject, clsdecl, + do_cast=True, python_owns=False, is_ref=False, fresh=False): rawobject = rffi.cast(capi.C_OBJECT, rawobject) # cast to actual if requested and possible w_pycppclass = None if do_cast and rawobject: - actual = capi.c_actual_class(space, cppclass, rawobject) - if actual != cppclass.handle: + actual = capi.c_actual_class(space, clsdecl, rawobject) + if actual != clsdecl.handle: try: w_pycppclass = get_pythonized_cppclass(space, actual) - offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1) + offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1) rawobject = capi.direct_ptradd(rawobject, offset) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) + w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False) except Exception: # failed to locate/build the derived class, so stick to the base (note # that only get_pythonized_cppclass is expected to raise, so none of @@ -1242,18 +1257,18 @@ pass if w_pycppclass is None: - w_pycppclass = get_pythonized_cppclass(space, cppclass.handle) + w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle) # try to recycle existing object if this one is not newly created if not fresh and rawobject: obj = memory_regulator.retrieve(rawobject) - if obj is not None and obj.cppclass is cppclass: + if obj is not None and obj.clsdecl is clsdecl: return obj # fresh creation w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass) cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False) - cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns) + cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns) memory_regulator.register(cppinstance) return w_cppinstance @@ -1264,7 +1279,7 @@ except TypeError: pass # attempt to get address of C++ instance - return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj)) + return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False)) @unwrap_spec(w_obj=W_Root) def addressof(space, w_obj): @@ -1273,19 +1288,30 @@ return space.newlong(address) @unwrap_spec(owns=bool, cast=bool) -def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): - """Takes an address and a bound C++ class proxy, returns a bound instance.""" +def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False): try: # attempt address from array or C++ instance rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj)) except Exception: # accept integer value as address rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj)) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - if not w_cppclass: - w_cppclass = scope_byname(space, space.text_w(w_pycppclass)) - if not w_cppclass: + decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False) + return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast) + + at unwrap_spec(owns=bool, cast=bool) +def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): + """Takes an address and a bound C++ class proxy, returns a bound instance.""" + w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + if not w_clsdecl: + w_clsdecl = scope_byname(space, space.text_w(w_pycppclass)) + if not w_clsdecl: raise oefmt(space.w_TypeError, "no such class: %s", space.text_w(w_pycppclass)) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) - return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns) + return _bind_object(space, w_obj, w_clsdecl, owns, cast) + +def move(space, w_obj): + """Casts the given instance into an C++-style rvalue.""" + obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + if obj: + obj.flags |= INSTANCE_FLAGS_IS_R_VALUE + return w_obj diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -10,7 +10,7 @@ class CPPMetaScope(type): def __getattr__(self, name): try: - return get_pycppitem(self, name) # will cache on self + return get_scoped_pycppitem(self, name) # will cache on self except Exception as e: raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) @@ -36,11 +36,14 @@ self._scope = scope def _arg_to_str(self, arg): - if arg == str: - import _cppyy - arg = _cppyy._std_string_name() - elif type(arg) != str: - arg = arg.__name__ + try: + arg = arg.__cppname__ + except AttributeError: + if arg == str: + import _cppyy + arg = _cppyy._std_string_name() + elif type(arg) != str: + arg = arg.__name__ return arg def __call__(self, *args): @@ -58,8 +61,36 @@ return self.__call__(*args) -def clgen_callback(name): - return get_pycppclass(name) +def scope_splitter(name): + is_open_template, scope = 0, "" + for c in name: + if c == ':' and not is_open_template: + if scope: + yield scope + scope = "" + continue + elif c == '<': + is_open_template += 1 + elif c == '>': + is_open_template -= 1 + scope += c + yield scope + +def get_pycppitem(final_scoped_name): + # walk scopes recursively down from global namespace ("::") to get the + # actual (i.e. not typedef'ed) class, triggering all necessary creation + scope = gbl + for name in scope_splitter(final_scoped_name): + scope = getattr(scope, name) + return scope +get_pycppclass = get_pycppitem # currently no distinction, but might + # in future for performance + + +# callbacks (originating from interp_cppyy.py) to allow interp-level to +# initiate creation of app-level classes and function +def clgen_callback(final_scoped_name): + return get_pycppclass(final_scoped_name) def fngen_callback(func, npar): # todo, some kind of arg transform spec if npar == 0: @@ -75,20 +106,19 @@ return wrapper +# construction of namespaces and classes, and their helpers +def make_module_name(scope): + if scope: + return scope.__module__ + '.' + scope.__name__ + return 'cppyy' + def make_static_function(func_name, cppol): def function(*args): return cppol.call(None, *args) function.__name__ = func_name - function.__doc__ = cppol.signature() + function.__doc__ = cppol.prototype() return staticmethod(function) -def make_method(meth_name, cppol): - def method(self, *args): - return cppol.call(self, *args) - method.__name__ = meth_name - method.__doc__ = cppol.signature() - return method - def make_cppnamespace(scope, name, decl): # build up a representation of a C++ namespace (namespaces are classes) @@ -98,20 +128,19 @@ ns_meta = type(name+'_meta', (CPPMetaNamespace,), {}) # create the python-side C++ namespace representation, cache in scope if given - d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ } + d = {"__cppdecl__" : decl, + "__module__" : make_module_name(scope), + "__cppname__" : decl.__cppname__ } pyns = ns_meta(name, (CPPNamespace,), d) if scope: setattr(scope, name, pyns) # install as modules to allow importing from (note naming: cppyy) - modname = 'cppyy.gbl' - if scope: - modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.') - sys.modules[modname] = pyns + sys.modules[make_module_name(pyns)] = pyns return pyns def _drop_cycles(bases): - # TODO: figure this out, as it seems to be a PyPy bug?! + # TODO: figure out why this is necessary? for b1 in bases: for b2 in bases: if not (b1 is b2) and issubclass(b2, b1): @@ -119,27 +148,37 @@ break return tuple(bases) -def make_new(class_name): + +def make_new(decl): def __new__(cls, *args): # create a place-holder only as there may be a derived class defined + # TODO: get rid of the import and add user-land bind_object that uses + # _bind_object (see interp_cppyy.py) import _cppyy - instance = _cppyy.bind_object(0, class_name, True) + instance = _cppyy._bind_object(0, decl, True) if not instance.__class__ is cls: instance.__class__ = cls # happens for derived class return instance return __new__ -def make_cppclass(scope, class_name, final_class_name, decl): +def make_method(meth_name, cppol): + def method(self, *args): + return cppol.call(self, *args) + method.__name__ = meth_name + method.__doc__ = cppol.prototype() + return method + +def make_cppclass(scope, cl_name, decl): # get a list of base classes for class creation bases = [get_pycppclass(base) for base in decl.get_base_names()] if not bases: bases = [CPPClass,] else: - # it's technically possible that the required class now has been built - # if one of the base classes uses it in e.g. a function interface + # it's possible that the required class now has been built if one of + # the base classes uses it in e.g. a function interface try: - return scope.__dict__[final_class_name] + return scope.__dict__[cl_name] except KeyError: pass @@ -147,39 +186,41 @@ d_meta = {} # prepare dictionary for python-side C++ class representation - def dispatch(self, name, signature): - cppol = decl.dispatch(name, signature) - return types.MethodType(make_method(name, cppol), self, type(self)) + def dispatch(self, m_name, signature): + cppol = decl.__dispatch__(m_name, signature) + return types.MethodType(make_method(m_name, cppol), self, type(self)) d_class = {"__cppdecl__" : decl, + "__new__" : make_new(decl), + "__module__" : make_module_name(scope), "__cppname__" : decl.__cppname__, - "__new__" : make_new(class_name), + "__dispatch__" : dispatch, } # insert (static) methods into the class dictionary - for name in decl.get_method_names(): - cppol = decl.get_overload(name) + for m_name in decl.get_method_names(): + cppol = decl.get_overload(m_name) if cppol.is_static(): - d_class[name] = make_static_function(name, cppol) + d_class[m_name] = make_static_function(m_name, cppol) else: - d_class[name] = make_method(name, cppol) + d_class[m_name] = make_method(m_name, cppol) # add all data members to the dictionary of the class to be created, and # static ones also to the metaclass (needed for property setters) - for name in decl.get_datamember_names(): - cppdm = decl.get_datamember(name) - d_class[name] = cppdm + for d_name in decl.get_datamember_names(): + cppdm = decl.get_datamember(d_name) + d_class[d_name] = cppdm if cppdm.is_static(): - d_meta[name] = cppdm + d_meta[d_name] = cppdm # create a metaclass to allow properties (for static data write access) metabases = [type(base) for base in bases] - metacpp = type(CPPMetaScope)(class_name+'_meta', _drop_cycles(metabases), d_meta) + metacpp = type(CPPMetaScope)(cl_name+'_meta', _drop_cycles(metabases), d_meta) # create the python-side C++ class - pycls = metacpp(class_name, _drop_cycles(bases), d_class) + pycls = metacpp(cl_name, _drop_cycles(bases), d_class) # store the class on its outer scope - setattr(scope, final_class_name, pycls) + setattr(scope, cl_name, pycls) # the call to register will add back-end specific pythonizations and thus # needs to run first, so that the generic pythonizations can use them @@ -192,32 +233,32 @@ return CPPTemplate(template_name, scope) -def get_pycppitem(scope, name): +def get_scoped_pycppitem(scope, name): import _cppyy - # resolve typedefs/aliases - full_name = (scope == gbl) and name or (scope.__name__+'::'+name) - true_name = _cppyy._resolve_name(full_name) - if true_name != full_name: - return get_pycppclass(true_name) + # resolve typedefs/aliases: these may cross namespaces, in which case + # the lookup must trigger the creation of all necessary scopes + scoped_name = (scope == gbl) and name or (scope.__cppname__+'::'+name) + final_scoped_name = _cppyy._resolve_name(scoped_name) + if final_scoped_name != scoped_name: + pycppitem = get_pycppitem(final_scoped_name) + # also store on the requested scope (effectively a typedef or pointer copy) + setattr(scope, name, pycppitem) + return pycppitem pycppitem = None - # classes - cppitem = _cppyy._scope_byname(true_name) + # scopes (classes and namespaces) + cppitem = _cppyy._scope_byname(final_scoped_name) if cppitem: - name = true_name - if scope != gbl: - name = true_name[len(scope.__cppname__)+2:] if cppitem.is_namespace(): pycppitem = make_cppnamespace(scope, name, cppitem) - setattr(scope, name, pycppitem) else: - pycppitem = make_cppclass(scope, name, true_name, cppitem) + pycppitem = make_cppclass(scope, name, cppitem) # templates if not cppitem: - cppitem = _cppyy._template_byname(true_name) + cppitem = _cppyy._is_template(final_scoped_name) if cppitem: pycppitem = make_cpptemplatetype(scope, name) setattr(scope, name, pycppitem) @@ -249,29 +290,6 @@ raise AttributeError("'%s' has no attribute '%s'" % (str(scope), name)) -def scope_splitter(name): - is_open_template, scope = 0, "" - for c in name: - if c == ':' and not is_open_template: - if scope: - yield scope - scope = "" - continue - elif c == '<': - is_open_template += 1 - elif c == '>': - is_open_template -= 1 - scope += c - yield scope - -def get_pycppclass(name): - # break up the name, to walk the scopes and get the class recursively - scope = gbl - for part in scope_splitter(name): - scope = getattr(scope, part) - return scope - - # pythonization by decoration (move to their own file?) def python_style_getitem(self, idx): # python-style indexing: check for size and allow indexing from the back @@ -346,8 +364,8 @@ # also the fallback on the indexed __getitem__, but that is slower) if not 'vector' in pyclass.__name__[:11] and \ ('begin' in pyclass.__dict__ and 'end' in pyclass.__dict__): - if _cppyy._scope_byname(pyclass.__name__+'::iterator') or \ - _cppyy._scope_byname(pyclass.__name__+'::const_iterator'): + if _cppyy._scope_byname(pyclass.__cppname__+'::iterator') or \ + _cppyy._scope_byname(pyclass.__cppname__+'::const_iterator'): def __iter__(self): i = self.begin() while i != self.end(): @@ -416,17 +434,21 @@ # pre-create std to allow direct importing gbl.std = make_cppnamespace(gbl, 'std', _cppyy._scope_byname('std')) + # add move cast + gbl.std.move = _cppyy.move + # install a type for enums to refer to # TODO: this is correct for C++98, not for C++11 and in general there will # be the same issue for all typedef'd builtin types setattr(gbl, 'internal_enum_type_t', int) - # install nullptr as a unique reference - setattr(gbl, 'nullptr', _cppyy._get_nullptr()) From pypy.commits at gmail.com Fri Nov 3 10:35:28 2017 From: pypy.commits at gmail.com (stian) Date: Fri, 03 Nov 2017 07:35:28 -0700 (PDT) Subject: [pypy-commit] pypy math-improvements: Speed up division slightly Message-ID: <59fc7eb0.6896df0a.2c39d.62db@mx.google.com> Author: stian Branch: math-improvements Changeset: r92915:28ef9f10c404 Date: 2017-11-03 15:34 +0100 http://bitbucket.org/pypy/pypy/changeset/28ef9f10c404/ Log: Speed up division slightly diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2168,12 +2168,13 @@ if j >= size_v: vtop = 0 else: - vtop = v.widedigit(j) - assert vtop <= wm1 - vv = (vtop << SHIFT) | v.widedigit(abs(j-1)) + vtop = v.widedigit(j) << SHIFT + #assert vtop <= wm1 + vv = vtop | v.widedigit(abs(j-1)) q = vv / wm1 - r = vv - wm1 * q - while wm2 * q > ((r << SHIFT) | v.widedigit(abs(j-2))): + r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q. + vj2 = v.widedigit(abs(j-2)) + while wm2 * q > ((r << SHIFT) | vj2): q -= 1 r += wm1 From pypy.commits at gmail.com Fri Nov 3 11:12:23 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 03 Nov 2017 08:12:23 -0700 (PDT) Subject: [pypy-commit] pypy default: fix tests Message-ID: <59fc8757.d0e61c0a.917c0.f6d3@mx.google.com> Author: Matti Picus Branch: Changeset: r92916:503b1a72abab Date: 2017-11-03 17:11 +0200 http://bitbucket.org/pypy/pypy/changeset/503b1a72abab/ Log: fix tests diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -213,8 +213,9 @@ old_dir = os.getcwd() try: os.chdir(str(builddir)) - for source, target in binaries: - smartstrip(bindir.join(target), keep_debug=options.keep_debug) + if not _fake: + for source, target in binaries: + smartstrip(bindir.join(target), keep_debug=options.keep_debug) # if USE_ZIPFILE_MODULE: import zipfile From pypy.commits at gmail.com Fri Nov 3 11:59:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 03 Nov 2017 08:59:53 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Merged in nanjekye/pypy/os_lockf (pull request #575) Message-ID: <59fc9279.28361c0a.a0b50.337f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92920:1214e3588b0f Date: 2017-11-03 15:59 +0000 http://bitbucket.org/pypy/pypy/changeset/1214e3588b0f/ Log: Merged in nanjekye/pypy/os_lockf (pull request #575) lockf posix attribute diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -237,9 +237,15 @@ if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if sys.platform.startswith('linux'): + interpleveldefs['lockf'] = 'interp_posix.lockf' + for _name in ['F_LOCK', 'F_TLOCK', 'F_ULOCK', 'F_TEST']: + if getattr(rposix, _name) is not None: + interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if hasattr(rposix, 'sched_yield'): interpleveldefs['sched_yield'] = 'interp_posix.sched_yield' - + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2469,6 +2469,19 @@ else: return space.newint(s) + at unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong) +def lockf(space, fd, cmd, length): + """apply, test or remove a POSIX lock on an + open file. + """ + while True: + try: + s = rposix.lockf(fd, cmd, length) + except OSError as e: + wrap_oserror(space, e, eintr_retry=True) + else: + return space.newint(s) + def sched_yield(space): """ Voluntarily relinquish the CPU""" while True: diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1337,6 +1337,17 @@ posix.close(fd) s2.close() s1.close() + + def test_os_lockf(self): + posix, os = self.posix, self.os + fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT) + try: + os.write(fd, b'test') + os.lseek(fd, 0, 0) + posix.lockf(fd, posix.F_LOCK, 4) + posix.lockf(fd, posix.F_ULOCK, 4) + finally: + os.close(fd) def test_urandom(self): os = self.posix diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -276,6 +276,10 @@ SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER') SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH') O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK') + F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK') + F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK') + F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK') + F_TEST = rffi_platform.DefinedConstantInteger('F_TEST') OFF_T = rffi_platform.SimpleType('off_t') OFF_T_SIZE = rffi_platform.SizeOf('off_t') @@ -548,6 +552,14 @@ if error != 0: raise OSError(error, 'posix_fadvise failed') + c_lockf = external('lockf', + [rffi.INT, rffi.INT , OFF_T], rffi.INT, + save_err=rffi.RFFI_SAVE_ERRNO) + @enforceargs(int, None, None) + def lockf(fd, cmd, length): + validate_fd(fd) + return handle_posix_error('lockf', c_lockf(fd, cmd, length)) + c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT, macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO) c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT, diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -816,3 +816,14 @@ if sys.platform != 'win32': rposix.sched_yield() + at rposix_requires('lockf') +def test_os_lockf(): + fname = str(udir.join('os_test.txt')) + fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777) + try: + os.write(fd, b'test') + os.lseek(fd, 0, 0) + rposix.lockf(fd, rposix.F_LOCK, 4) + rposix.lockf(fd, rposix.F_ULOCK, 4) + finally: + os.close(fd) From pypy.commits at gmail.com Fri Nov 3 12:00:02 2017 From: pypy.commits at gmail.com (nanjekye) Date: Fri, 03 Nov 2017 09:00:02 -0700 (PDT) Subject: [pypy-commit] pypy os_lockf: lockf posixattributes Message-ID: <59fc9282.4d051c0a.bb8f7.36d8@mx.google.com> Author: Joannah Nanjekye Branch: os_lockf Changeset: r92917:287c9946859b Date: 2017-10-28 22:59 +0300 http://bitbucket.org/pypy/pypy/changeset/287c9946859b/ Log: lockf posixattributes diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -237,6 +237,12 @@ if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if sys.platform.startswith('linux'): + interpleveldefs['lockf'] = 'interp_posix.lockf' + for _name in ['F_LOCK', 'F_TLOCK', 'F_ULOCK', 'F_TEST']: + if getattr(rposix, _name) is not None: + interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2468,3 +2468,16 @@ wrap_oserror(space, e, eintr_retry=True) else: return space.newint(s) + + at unwrap_spec(fd=c_int, cmd=c_int, length=r_longlong) +def lockf(space, fd, cmd, length): + """apply, test or remove a POSIX lock on an + open file. + """ + while True: + try: + s = rposix.lockf(fd, cmd, length) + except OSError as e: + wrap_oserror(space, e, eintr_retry=True) + else: + return space.newint(s) diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -1331,6 +1331,17 @@ posix.close(fd) s2.close() s1.close() + + def test_os_lockf(self): + posix, os = self.posix, self.os + fd = os.open(self.path2 + 'test_os_lockf', os.O_WRONLY | os.O_CREAT) + try: + os.write(fd, b'test') + os.lseek(fd, 0, 0) + posix.lockf(fd, posix.F_LOCK, 4) + posix.lockf(fd, posix.F_ULOCK, 4) + finally: + os.close(fd) def test_urandom(self): os = self.posix diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -276,6 +276,10 @@ SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER') SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH') O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK') + F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK') + F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK') + F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK') + F_TEST = rffi_platform.DefinedConstantInteger('F_TEST') OFF_T = rffi_platform.SimpleType('off_t') OFF_T_SIZE = rffi_platform.SizeOf('off_t') @@ -548,6 +552,14 @@ if error != 0: raise OSError(error, 'posix_fadvise failed') + c_lockf = external('lockf', + [rffi.INT, rffi.INT , OFF_T], rffi.INT, + save_err=rffi.RFFI_SAVE_ERRNO) + @enforceargs(int, None, None) + def lockf(fd, cmd, length): + validate_fd(fd) + return handle_posix_error('lockf', c_lockf(fd, cmd, length)) + c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT, macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO) c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT, diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -811,3 +811,14 @@ assert isinstance(high, int) == True assert high > low + at rposix_requires('lockf') +def test_os_lockf(): + fname = str(udir.join('os_test.txt')) + fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777) + try: + os.write(fd, b'test') + os.lseek(fd, 0, 0) + rposix.lockf(fd, rposix.F_LOCK, 4) + rposix.lockf(fd, rposix.F_ULOCK, 4) + finally: + os.close(fd) From pypy.commits at gmail.com Fri Nov 3 12:00:06 2017 From: pypy.commits at gmail.com (nanjekye) Date: Fri, 03 Nov 2017 09:00:06 -0700 (PDT) Subject: [pypy-commit] pypy os_lockf: merge conflict Message-ID: <59fc9286.46901c0a.da0ea.e0dc@mx.google.com> Author: Joannah Nanjekye Branch: os_lockf Changeset: r92918:f3f07f772e02 Date: 2017-10-30 12:57 +0300 http://bitbucket.org/pypy/pypy/changeset/f3f07f772e02/ Log: merge conflict diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -75,6 +75,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/lib-python/3/ctypes/test/test_bitfields.py b/lib-python/3/ctypes/test/test_bitfields.py --- a/lib-python/3/ctypes/test/test_bitfields.py +++ b/lib-python/3/ctypes/test/test_bitfields.py @@ -1,5 +1,5 @@ from ctypes import * -from ctypes.test import need_symbol +from ctypes.test import need_symbol, xfail import unittest import os @@ -279,6 +279,7 @@ self.assertEqual(b, b'\xef\xcd\xab\x21') @need_symbol('c_uint32') + @xfail def test_uint32_swap_big_endian(self): # Issue #23319 class Big(BigEndianStructure): diff --git a/lib-python/3/ctypes/test/test_byteswap.py b/lib-python/3/ctypes/test/test_byteswap.py --- a/lib-python/3/ctypes/test/test_byteswap.py +++ b/lib-python/3/ctypes/test/test_byteswap.py @@ -2,6 +2,7 @@ from binascii import hexlify from ctypes import * +from test.support import impl_detail def bin(s): return hexlify(memoryview(s)).decode().upper() @@ -22,6 +23,7 @@ setattr(bits, "i%s" % i, 1) dump(bits) + @impl_detail("slots are irrelevant on PyPy", pypy=False) def test_slots(self): class BigPoint(BigEndianStructure): __slots__ = () diff --git a/lib-python/3/ctypes/test/test_frombuffer.py b/lib-python/3/ctypes/test/test_frombuffer.py --- a/lib-python/3/ctypes/test/test_frombuffer.py +++ b/lib-python/3/ctypes/test/test_frombuffer.py @@ -85,7 +85,6 @@ del a gc.collect() # Should not crash - @xfail def test_from_buffer_copy(self): a = array.array("i", range(16)) x = (c_int * 16).from_buffer_copy(a) diff --git a/lib-python/3/test/test_bytes.py b/lib-python/3/test/test_bytes.py --- a/lib-python/3/test/test_bytes.py +++ b/lib-python/3/test/test_bytes.py @@ -721,9 +721,12 @@ self.assertIs(type(BytesSubclass(A())), BytesSubclass) # Test PyBytes_FromFormat() - @test.support.impl_detail("don't test cpyext here") def test_from_format(self): test.support.import_module('ctypes') + try: + from ctypes import pythonapi + except ImportError: + self.skipTest( "no pythonapi in ctypes") from ctypes import pythonapi, py_object, c_int, c_char_p PyBytes_FromFormat = pythonapi.PyBytes_FromFormat PyBytes_FromFormat.restype = py_object diff --git a/lib-python/3/test/test_unicode.py b/lib-python/3/test/test_unicode.py --- a/lib-python/3/test/test_unicode.py +++ b/lib-python/3/test/test_unicode.py @@ -2396,6 +2396,10 @@ # Test PyUnicode_FromFormat() def test_from_format(self): support.import_module('ctypes') + try: + from ctypes import pythonapi + except ImportError: + self.skipTest( "no pythonapi in ctypes") from ctypes import ( pythonapi, py_object, sizeof, c_int, c_long, c_longlong, c_ssize_t, diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,9 +8,14 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None return res - + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) subletter = getattr(res._type_, '_type_', None) if subletter == 'c': @@ -55,7 +60,7 @@ for i in range(len(val)): target[i] = val[i] if len(val) < self._length_: - target[len(val)] = '\x00' + target[len(val)] = u'\x00' res.value = property(getvalue, setvalue) res._ffishape_ = (ffiarray, res._length_) @@ -164,7 +169,7 @@ if letter == 'c': return b"".join(l) if letter == 'u': - return "".join(l) + return u"".join(l) return l class Array(_CData, metaclass=ArrayMeta): diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -165,6 +165,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -113,7 +113,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __bool__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/primitive.py b/lib_pypy/_ctypes/primitive.py --- a/lib_pypy/_ctypes/primitive.py +++ b/lib_pypy/_ctypes/primitive.py @@ -232,9 +232,6 @@ elif tp == 'u': def _setvalue(self, val): - if isinstance(val, bytes): - val = val.decode(ConvMode.encoding, ConvMode.errors) - # possible if we use 'ignore' if val: self._buffer[0] = val def _getvalue(self): @@ -243,8 +240,6 @@ elif tp == 'c': def _setvalue(self, val): - if isinstance(val, str): - val = val.encode(ConvMode.encoding, ConvMode.errors) if val: self._buffer[0] = val def _getvalue(self): diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -290,6 +290,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1080,21 +1080,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO --- a/lib_pypy/cffi.egg-info/PKG-INFO +++ b/lib_pypy/cffi.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cffi -Version: 1.11.1 +Version: 1.11.2 Summary: Foreign Function Interface for Python calling C code. Home-page: http://cffi.readthedocs.org Author: Armin Rigo, Maciej Fijalkowski diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -4,8 +4,8 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing -__version__ = "1.11.1" -__version_info__ = (1, 11, 1) +__version__ = "1.11.2" +__version_info__ = (1, 11, 2) # The verifier module file names are based on the CRC32 of a string that # contains the following version number. It may be older than __version__ diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -238,9 +238,9 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) { if (sizeof(_cffi_wchar_t) == 2) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else - return _cffi_from_c_wchar3216_t(x); + return _cffi_from_c_wchar3216_t((int)x); } _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) @@ -254,7 +254,7 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) { if (sizeof(_cffi_wchar_t) == 4) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else return _cffi_from_c_wchar3216_t(x); } diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -247,7 +247,7 @@ if (f != NULL && f != Py_None) { PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME - "\ncompiled with cffi version: 1.11.1" + "\ncompiled with cffi version: 1.11.2" "\n_cffi_backend module: ", f); modules = PyImport_GetModuleDict(); mod = PyDict_GetItemString(modules, "_cffi_backend"); diff --git a/pypy/TODO b/pypy/TODO --- a/pypy/TODO +++ b/pypy/TODO @@ -1,18 +1,4 @@ -TODO for the python3 test suite: - -* test_memoryview - Needs bytes/str changes. Probably easy. Work for this has begun on - py3k-memoryview (by mjacob) https://bugs.pypy.org/issue1542 - -own-tests: - -* module/test_lib_pypy - These crash the buildbots (via SyntaxErrors): others were really - made to run under Python 2.x and so simply fail - -* module.cpyext.test.test_structseq test_StructSeq - structseq now subclasses tuple on py3, which breaks how - BaseCpyTypeDescr.realize allocates it +... antocuni's older TODO: @@ -20,14 +6,6 @@ * run coverage against the parser/astbuilder/astcompiler: it's probably full of dead code because the grammar changed -* re-enable strategies https://bugs.pypy.org/issue1540 : - - re-enable IntDictStrategy - - re-enable StdObjSpace.listview_str - - re-enable the kwargs dict strategy in dictmultiobject.py - - re-enable view_as_kwargs - -* unskip numpypy tests in module/test_lib_pypy/numpypy/ - * optimize W_UnicodeObject, right now it stores both an unicode and an utf8 version of the same string diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -119,7 +119,7 @@ To run untranslated tests, you need the Boehm garbage collector libgc. -On recent Debian and Ubuntu (like 17.04), this is the command to install +On recent Debian and Ubuntu (16.04 onwards), this is the command to install all build-time dependencies:: apt-get install gcc make libffi-dev pkg-config zlib1g-dev libbz2-dev \ @@ -127,7 +127,7 @@ tk-dev libgc-dev python-cffi \ liblzma-dev libncursesw5-dev # these two only needed on PyPy3 -On older Debian and Ubuntu (12.04 to 16.04):: +On older Debian and Ubuntu (12.04-14.04):: apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \ libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \ @@ -149,12 +149,23 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X, most of these build-time dependencies are installed alongside +On Mac OS X:: + +Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to find them you may need to run:: xcode-select --install +An exception is OpenSSL, which is no longer provided with the operating +system. It can be obtained via Homebrew (with ``$ brew install openssl``), +but it will not be available on the system path by default. The easiest +way to enable it for building pypy is to set an environment variable:: + + export PKG_CONFIG_PATH=$(brew --prefix)/opt/openssl/lib/pkgconfig + +After setting this, translation (described next) will find the OpenSSL libs +as expected. Run the translation ------------------- @@ -187,18 +198,18 @@ entire pypy interpreter. This step is currently singe threaded, and RAM hungry. As part of this step, the chain creates a large number of C code files and a Makefile to compile them in a - directory controlled by the ``PYPY_USESSION_DIR`` environment variable. + directory controlled by the ``PYPY_USESSION_DIR`` environment variable. 2. Create an executable ``pypy-c`` by running the Makefile. This step can - utilize all possible cores on the machine. -3. Copy the needed binaries to the current directory. -4. Generate c-extension modules for any cffi-based stdlib modules. + utilize all possible cores on the machine. +3. Copy the needed binaries to the current directory. +4. Generate c-extension modules for any cffi-based stdlib modules. The resulting executable behaves mostly like a normal Python interpreter (see :doc:`cpython_differences`), and is ready for testing, for use as a base interpreter for a new virtualenv, or for packaging into a binary suitable for installation on another machine running the same OS as the build -machine. +machine. Note that step 4 is merely done as a convenience, any of the steps may be rerun without rerunning the previous steps. @@ -255,7 +266,7 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar - commands at install time; the exact list is in + commands at install time; the exact list is in :source:`pypy/tool/release/package.py `. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst --- a/pypy/doc/project-ideas.rst +++ b/pypy/doc/project-ideas.rst @@ -240,9 +240,12 @@ **matplotlib** https://github.com/matplotlib/matplotlib - TODO: the tkagg backend does not work, which makes tests fail on downstream - projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in - tkagg.py, which requires refactoring + Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of + matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the + tkagg backend can function. + + TODO: the matplotlib branch passes numpy arrays by value (copying all the + data), this proof-of-concept needs help to become completely compliant **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi diff --git a/pypy/doc/test/test_whatsnew.py b/pypy/doc/test/test_whatsnew.py --- a/pypy/doc/test/test_whatsnew.py +++ b/pypy/doc/test/test_whatsnew.py @@ -89,7 +89,7 @@ startrev, documented = parse_doc(last_whatsnew) merged, branch = get_merged_branches(ROOT, startrev, '') merged.discard('default') - merged.discard('py3k') + merged.discard('py3.5') merged.discard('') not_documented = merged.difference(documented) not_merged = documented.difference(merged) @@ -100,7 +100,7 @@ print '\n'.join(not_merged) print assert not not_documented - if branch == 'py3k': + if branch == 'py3.5': assert not not_merged else: assert branch in documented, 'Please document this branch before merging: %s' % branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -5,4 +5,8 @@ .. this is a revision shortly after release-pypy2.7-v5.9.0 .. startrev:d56dadcef996 +.. branch: cppyy-packaging +Cleanup and improve cppyy packaging +.. branch: docs-osx-brew-openssl + diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -4,8 +4,3 @@ .. this is the revision after release-pypy3.5-5.9 .. startrev: be41e3ac0a29 - -.. branch: multiphase - -Implement PyType_FromSpec (PEP 384) and fix issues with PEP 489 support. - diff --git a/pypy/interpreter/test/test_interpreter.py b/pypy/interpreter/test/test_interpreter.py --- a/pypy/interpreter/test/test_interpreter.py +++ b/pypy/interpreter/test/test_interpreter.py @@ -1,7 +1,6 @@ import py import sys from pypy.interpreter import gateway, module, error -from hypothesis import given, strategies class TestInterpreter: @@ -300,30 +299,6 @@ assert "TypeError:" in res assert "'tuple' object is not a mapping" in res - @given(strategies.lists(strategies.one_of(strategies.none(), - strategies.lists(strategies.none())))) - def test_build_map_order(self, shape): - value = [10] - def build_expr(shape): - if shape is None: - value[0] += 1 - return '0: %d' % value[0] - else: - return '**{%s}' % (', '.join( - [build_expr(shape1) for shape1 in shape]),) - - expr = build_expr(shape)[2:] - code = """ - def f(): - return %s - """ % (expr, ) - res = self.codetest(code, 'f', []) - if value[0] == 10: - expected = {} - else: - expected = {0: value[0]} - assert res == expected, "got %r for %r" % (res, expr) - def test_build_map_unpack_with_call(self): code = """ def f(a,b,c,d): @@ -348,6 +323,36 @@ assert "TypeError:" in resg4 assert "got multiple values for keyword argument 'a'" in resg4 +try: + from hypothesis import given, strategies +except ImportError: + pass +else: + class TestHypothesisInterpreter(TestInterpreter): + @given(strategies.lists(strategies.one_of(strategies.none(), + strategies.lists(strategies.none())))) + def test_build_map_order(self, shape): + value = [10] + def build_expr(shape): + if shape is None: + value[0] += 1 + return '0: %d' % value[0] + else: + return '**{%s}' % (', '.join( + [build_expr(shape1) for shape1 in shape]),) + + expr = build_expr(shape)[2:] + code = """ + def f(): + return %s + """ % (expr, ) + res = self.codetest(code, 'f', []) + if value[0] == 10: + expected = {} + else: + expected = {0: value[0]} + assert res == expected, "got %r for %r" % (res, expr) + class AppTestInterpreter: def test_trivial(self): diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -3,7 +3,7 @@ from rpython.rlib import rdynload, clibffi from rpython.rtyper.lltypesystem import rffi -VERSION = "1.11.1" +VERSION = "1.11.2" FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI try: diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -1,7 +1,7 @@ # ____________________________________________________________ import sys -assert __version__ == "1.11.1", ("This test_c.py file is for testing a version" +assert __version__ == "1.11.2", ("This test_c.py file is for testing a version" " of cffi that differs from the one that we" " get from 'import _cffi_backend'") if sys.version_info < (3,): diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -762,9 +762,16 @@ assert s == b'\xe9' def test_lone_surrogates(self): - for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', - 'utf-32', 'utf-32-le', 'utf-32-be'): + encodings = ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', + 'utf-32', 'utf-32-le', 'utf-32-be') + for encoding in encodings: raises(UnicodeEncodeError, u'\ud800'.encode, encoding) + assert (u'[\udc80]'.encode(encoding, "backslashreplace") == + '[\\udc80]'.encode(encoding)) + assert (u'[\udc80]'.encode(encoding, "ignore") == + '[]'.encode(encoding)) + assert (u'[\udc80]'.encode(encoding, "replace") == + '[?]'.encode(encoding)) def test_charmap_encode(self): assert 'xxx'.encode('charmap') == b'xxx' diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -290,66 +290,87 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet # - def g(c): + def bar(c): c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py --- a/pypy/module/cpyext/api.py +++ b/pypy/module/cpyext/api.py @@ -582,6 +582,7 @@ 'PyComplex_AsCComplex', 'PyComplex_FromCComplex', 'PyObject_AsReadBuffer', 'PyObject_AsWriteBuffer', 'PyObject_CheckReadBuffer', + 'PyBuffer_GetPointer', 'PyBuffer_ToContiguous', 'PyBuffer_FromContiguous', 'PyImport_ImportModuleLevel', diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py --- a/pypy/module/cpyext/dictobject.py +++ b/pypy/module/cpyext/dictobject.py @@ -274,7 +274,10 @@ if pos == 0: # Store the current keys in the PyDictObject. decref(space, py_dict.c__tmpkeys) - w_keys = space.call_method(space.w_dict, "keys", w_dict) + w_keyview = space.call_method(space.w_dict, "keys", w_dict) + # w_keys must use the object strategy in order to keep the keys alive + w_keys = space.newlist(space.listview(w_keyview)) + w_keys.switch_to_object_strategy() py_dict.c__tmpkeys = create_ref(space, w_keys) Py_IncRef(space, py_dict.c__tmpkeys) else: @@ -287,10 +290,10 @@ decref(space, py_dict.c__tmpkeys) py_dict.c__tmpkeys = lltype.nullptr(PyObject.TO) return 0 - w_key = space.listview(w_keys)[pos] + w_key = space.listview(w_keys)[pos] # fast iff w_keys uses object strat w_value = space.getitem(w_dict, w_key) if pkey: - pkey[0] = as_pyobj(space, w_key) + pkey[0] = as_pyobj(space, w_key) if pvalue: pvalue[0] = as_pyobj(space, w_value) return 1 diff --git a/pypy/module/cpyext/include/object.h b/pypy/module/cpyext/include/object.h --- a/pypy/module/cpyext/include/object.h +++ b/pypy/module/cpyext/include/object.h @@ -308,6 +308,31 @@ PyAPI_FUNC(int) PyObject_AsReadBuffer(PyObject *, const void **, Py_ssize_t *); PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *, void **, Py_ssize_t *); PyAPI_FUNC(int) PyObject_CheckReadBuffer(PyObject *); +PyAPI_FUNC(void *) PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices); +/* Get the memory area pointed to by the indices for the buffer given. + Note that view->ndim is the assumed size of indices +*/ + +PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view, + Py_ssize_t len, char fort); +PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf, + Py_ssize_t len, char fort); +/* Copy len bytes of data from the contiguous chunk of memory + pointed to by buf into the buffer exported by obj. Return + 0 on success and return -1 and raise a PyBuffer_Error on + error (i.e. the object does not have a buffer interface or + it is not working). + + If fort is 'F' and the object is multi-dimensional, + then the data will be copied into the array in + Fortran-style (first dimension varies the fastest). If + fort is 'C', then the data will be copied into the array + in C-style (last dimension varies the fastest). If fort + is 'A', then it does not matter and the copy will be made + in whatever way is more efficient. + +*/ + #define PyObject_MALLOC PyObject_Malloc #define PyObject_REALLOC PyObject_Realloc diff --git a/pypy/module/cpyext/pyobject.py b/pypy/module/cpyext/pyobject.py --- a/pypy/module/cpyext/pyobject.py +++ b/pypy/module/cpyext/pyobject.py @@ -15,7 +15,7 @@ from rpython.rlib.objectmodel import keepalive_until_here from rpython.rtyper.annlowlevel import llhelper from rpython.rlib import rawrefcount, jit -from rpython.rlib.debug import fatalerror +from rpython.rlib.debug import ll_assert, fatalerror #________________________________________________________ @@ -243,6 +243,11 @@ py_obj = rawrefcount.from_obj(PyObject, w_obj) if not py_obj: py_obj = create_ref(space, w_obj, w_userdata, immortal=immortal) + # + # Try to crash here, instead of randomly, if we don't keep w_obj alive + ll_assert(py_obj.c_ob_refcnt >= rawrefcount.REFCNT_FROM_PYPY, + "Bug in cpyext: The W_Root object was garbage-collected " + "while being converted to PyObject.") return py_obj else: return lltype.nullptr(PyObject.TO) diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py --- a/pypy/module/cpyext/slotdefs.py +++ b/pypy/module/cpyext/slotdefs.py @@ -13,7 +13,7 @@ ssizessizeargfunc, ssizeobjargproc, iternextfunc, initproc, richcmpfunc, cmpfunc, hashfunc, descrgetfunc, descrsetfunc, objobjproc, objobjargproc, getbufferproc, ssizessizeobjargproc) -from pypy.module.cpyext.pyobject import make_ref, decref, from_ref +from pypy.module.cpyext.pyobject import make_ref, from_ref, as_pyobj from pypy.module.cpyext.pyerrors import PyErr_Occurred from pypy.module.cpyext.memoryobject import fill_Py_buffer from pypy.module.cpyext.state import State @@ -90,20 +90,21 @@ args_w = space.fixedview(w_args) return generic_cpy_call(space, func_binary, w_self, args_w[0]) +def _get_ob_type(space, w_obj): + # please ensure that w_obj stays alive + ob_type = as_pyobj(space, space.type(w_obj)) + return rffi.cast(PyTypeObjectPtr, ob_type) + def wrap_binaryfunc_l(space, w_self, w_args, func): func_binary = rffi.cast(binaryfunc, func) check_num_args(space, w_args, 1) args_w = space.fixedview(w_args) - ref = make_ref(space, w_self) - decref(space, ref) return generic_cpy_call(space, func_binary, w_self, args_w[0]) def wrap_binaryfunc_r(space, w_self, w_args, func): func_binary = rffi.cast(binaryfunc, func) check_num_args(space, w_args, 1) args_w = space.fixedview(w_args) - ref = make_ref(space, w_self) - decref(space, ref) return generic_cpy_call(space, func_binary, args_w[0], w_self) def wrap_ternaryfunc(space, w_self, w_args, func): @@ -121,8 +122,6 @@ func_ternary = rffi.cast(ternaryfunc, func) check_num_argsv(space, w_args, 1, 2) args_w = space.fixedview(w_args) - ref = make_ref(space, w_self) - decref(space, ref) arg3 = space.w_None if len(args_w) > 1: arg3 = args_w[1] @@ -314,12 +313,10 @@ def wrap_getreadbuffer(space, w_self, w_args, func): func_target = rffi.cast(readbufferproc, func) - py_obj = make_ref(space, w_self) - py_type = py_obj.c_ob_type + py_type = _get_ob_type(space, w_self) rbp = rffi.cast(rffi.VOIDP, 0) if py_type.c_tp_as_buffer: rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer) - decref(space, py_obj) with lltype.scoped_alloc(rffi.VOIDPP.TO, 1) as ptr: index = rffi.cast(Py_ssize_t, 0) size = generic_cpy_call(space, func_target, w_self, index, ptr) @@ -332,9 +329,7 @@ def wrap_getwritebuffer(space, w_self, w_args, func): func_target = rffi.cast(readbufferproc, func) - py_obj = make_ref(space, w_self) - py_type = py_obj.c_ob_type - decref(space, py_obj) + py_type = _get_ob_type(space, w_self) rbp = rffi.cast(rffi.VOIDP, 0) if py_type.c_tp_as_buffer: rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer) @@ -350,12 +345,10 @@ def wrap_getbuffer(space, w_self, w_args, func): func_target = rffi.cast(getbufferproc, func) - py_obj = make_ref(space, w_self) - py_type = py_obj.c_ob_type + py_type = _get_ob_type(space, w_self) rbp = rffi.cast(rffi.VOIDP, 0) if py_type.c_tp_as_buffer: rbp = rffi.cast(rffi.VOIDP, py_type.c_tp_as_buffer.c_bf_releasebuffer) - decref(space, py_obj) with lltype.scoped_alloc(Py_buffer) as pybuf: _flags = 0 if space.len_w(w_args) > 0: diff --git a/pypy/module/cpyext/src/abstract.c b/pypy/module/cpyext/src/abstract.c --- a/pypy/module/cpyext/src/abstract.c +++ b/pypy/module/cpyext/src/abstract.c @@ -96,6 +96,163 @@ return 0; } +void* +PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices) +{ + char* pointer; + int i; + pointer = (char *)view->buf; + for (i = 0; i < view->ndim; i++) { + pointer += view->strides[i]*indices[i]; + if ((view->suboffsets != NULL) && (view->suboffsets[i] >= 0)) { + pointer = *((char**)pointer) + view->suboffsets[i]; + } + } + return (void*)pointer; +} + +void +_Py_add_one_to_index_F(int nd, Py_ssize_t *index, const Py_ssize_t *shape) +{ + int k; + + for (k=0; k=0; k--) { + if (index[k] < shape[k]-1) { + index[k]++; + break; + } + else { + index[k] = 0; + } + } +} + + /* view is not checked for consistency in either of these. It is + assumed that the size of the buffer is view->len in + view->len / view->itemsize elements. + */ + +int +PyBuffer_ToContiguous(void *buf, Py_buffer *view, Py_ssize_t len, char fort) +{ + int k; + void (*addone)(int, Py_ssize_t *, const Py_ssize_t *); + Py_ssize_t *indices, elements; + char *dest, *ptr; + + if (len > view->len) { + len = view->len; + } + + if (PyBuffer_IsContiguous(view, fort)) { + /* simplest copy is all that is needed */ + memcpy(buf, view->buf, len); + return 0; + } + + /* Otherwise a more elaborate scheme is needed */ + + /* view->ndim <= 64 */ + indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim)); + if (indices == NULL) { + PyErr_NoMemory(); + return -1; + } + for (k=0; kndim;k++) { + indices[k] = 0; + } + + if (fort == 'F') { + addone = _Py_add_one_to_index_F; + } + else { + addone = _Py_add_one_to_index_C; + } + dest = buf; + /* XXX : This is not going to be the fastest code in the world + several optimizations are possible. + */ + elements = len / view->itemsize; + while (elements--) { + ptr = PyBuffer_GetPointer(view, indices); + memcpy(dest, ptr, view->itemsize); + dest += view->itemsize; + addone(view->ndim, indices, view->shape); + } + PyMem_Free(indices); + return 0; +} + +int +PyBuffer_FromContiguous(Py_buffer *view, void *buf, Py_ssize_t len, char fort) +{ + int k; + void (*addone)(int, Py_ssize_t *, const Py_ssize_t *); + Py_ssize_t *indices, elements; + char *src, *ptr; + + if (len > view->len) { + len = view->len; + } + + if (PyBuffer_IsContiguous(view, fort)) { + /* simplest copy is all that is needed */ + memcpy(view->buf, buf, len); + return 0; + } + + /* Otherwise a more elaborate scheme is needed */ + + /* view->ndim <= 64 */ + indices = (Py_ssize_t *)PyMem_Malloc(sizeof(Py_ssize_t)*(view->ndim)); + if (indices == NULL) { + PyErr_NoMemory(); + return -1; + } + for (k=0; kndim;k++) { + indices[k] = 0; + } + + if (fort == 'F') { + addone = _Py_add_one_to_index_F; + } + else { + addone = _Py_add_one_to_index_C; + } + src = buf; + /* XXX : This is not going to be the fastest code in the world + several optimizations are possible. + */ + elements = len / view->itemsize; + while (elements--) { + ptr = PyBuffer_GetPointer(view, indices); + memcpy(ptr, src, view->itemsize); + src += view->itemsize; + addone(view->ndim, indices, view->shape); + } + + PyMem_Free(indices); + return 0; +} + + + /* Buffer C-API for Python 3.0 */ int diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py --- a/pypy/module/cpyext/test/test_memoryobject.py +++ b/pypy/module/cpyext/test/test_memoryobject.py @@ -137,7 +137,36 @@ view = PyMemoryView_GET_BUFFER(memoryview); Py_DECREF(memoryview); return PyLong_FromLong(view->len / view->itemsize); - """)]) + """), + ("test_contiguous", "METH_O", + """ + Py_buffer* view; + PyObject * memoryview; + void * buf = NULL; + int ret; + Py_ssize_t len; + memoryview = PyMemoryView_FromObject(args); + if (memoryview == NULL) + return NULL; + view = PyMemoryView_GET_BUFFER(memoryview); + Py_DECREF(memoryview); + len = view->len; + if (len == 0) + return NULL; + buf = malloc(len); + ret = PyBuffer_ToContiguous(buf, view, view->len, 'A'); + if (ret != 0) + { + free(buf); + return NULL; + } + ret = PyBuffer_FromContiguous(view, buf, view->len, 'A'); + free(buf); + if (ret != 0) + return NULL; + Py_RETURN_NONE; + """), + ]) module = self.import_module(name='buffer_test') arr = module.PyMyArray(10) ten = foo.get_len(arr) @@ -146,6 +175,7 @@ assert ten == 10 ten = foo.test_buffer(arr) assert ten == 10 + foo.test_contiguous(arr) def test_releasebuffer(self): module = self.import_extension('foo', [ diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -337,12 +337,8 @@ PyObject* name = PyBytes_FromString("mymodule"); PyObject *obj = PyType_Type.tp_alloc(&PyType_Type, 0); PyHeapTypeObject *type = (PyHeapTypeObject*)obj; - if ((type->ht_type.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0) - { - PyErr_SetString(PyExc_ValueError, - "Py_TPFLAGS_HEAPTYPE not set"); - return NULL; - } + /* this is issue #2434: logic from pybind11 */ + type->ht_type.tp_flags |= Py_TPFLAGS_HEAPTYPE; type->ht_type.tp_name = ((PyTypeObject*)args)->tp_name; PyType_Ready(&type->ht_type); ret = PyObject_SetAttrString((PyObject*)&type->ht_type, diff --git a/pypy/module/imp/test/test_import.py b/pypy/module/imp/test/test_import.py --- a/pypy/module/imp/test/test_import.py +++ b/pypy/module/imp/test/test_import.py @@ -601,25 +601,15 @@ import pkg.a, imp imp.reload(pkg.a) - def test_reload_builtin(self): - import sys, imp - oldpath = sys.path - try: - del sys.settrace - except AttributeError: - pass - - imp.reload(sys) - - assert sys.path is oldpath - assert 'settrace' not in dir(sys) # at least on CPython 3.5.2 - def test_reload_builtin_doesnt_clear(self): import imp import sys sys.foobar = "baz" - imp.reload(sys) - assert sys.foobar == "baz" + try: + imp.reload(sys) + assert sys.foobar == "baz" + finally: + del sys.foobar def test_reimport_builtin_simple_case_1(self): import sys, time @@ -637,18 +627,18 @@ def test_reimport_builtin(self): import imp, sys, time - oldpath = sys.path - time.tzname = "" + old_sleep = time.sleep + time.sleep = "" del sys.modules['time'] import time as time1 assert sys.modules['time'] is time1 - assert time.tzname == "" + assert time.sleep == "" - imp.reload(time1) # don't leave a broken time.tzname behind + imp.reload(time1) # don't leave a broken time.sleep behind import time - assert time.tzname != "" + assert time.sleep is old_sleep def test_reload_infinite(self): import infinite_reload diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -243,6 +243,9 @@ if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) + if hasattr(rposix, 'sched_yield'): + interpleveldefs['sched_yield'] = 'interp_posix.sched_yield' + for _name in ["O_CLOEXEC"]: if getattr(rposix, _name) is not None: interpleveldefs[_name] = 'space.wrap(%d)' % getattr(rposix, _name) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2481,3 +2481,14 @@ wrap_oserror(space, e, eintr_retry=True) else: return space.newint(s) + +def sched_yield(space): + """ Voluntarily relinquish the CPU""" + while True: + try: + res = rposix.sched_yield() + except OSError as e: + wrap_oserror(space, e, eintr_retry=True) + else: + return space.newint(res) +>>>>>>> other diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -975,6 +975,12 @@ assert isinstance(high, int) == True assert high > low + if hasattr(rposix, 'sched_yield'): + def test_sched_yield(self): + os = self.posix + #Always suceeds on Linux + os.sched_yield() + def test_write_buffer(self): os = self.posix fd = os.open(self.path2 + 'test_write_buffer', diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -811,7 +811,7 @@ w_encoding) if space.is_none(w_namespace_separator): - namespace_separator = 0 + namespace_separator = -1 elif space.isinstance_w(w_namespace_separator, space.w_text): separator = space.text_w(w_namespace_separator) if len(separator) == 0: @@ -834,7 +834,7 @@ elif space.is_w(w_intern, space.w_None): w_intern = None - if namespace_separator: + if namespace_separator >= 0: xmlparser = XML_ParserCreateNS( encoding, rffi.cast(rffi.CHAR, namespace_separator)) diff --git a/pypy/module/pyexpat/test/test_parser.py b/pypy/module/pyexpat/test/test_parser.py --- a/pypy/module/pyexpat/test/test_parser.py +++ b/pypy/module/pyexpat/test/test_parser.py @@ -58,9 +58,9 @@ p.CharacterDataHandler = lambda s: data.append(s) encoding = encoding_arg is None and 'utf-8' or encoding_arg - res = p.Parse("\u00f6".encode(encoding), True) + res = p.Parse(u"\u00f6".encode(encoding), True) assert res == 1 - assert data == ["\u00f6"] + assert data == [u"\u00f6"] def test_get_handler(self): import pyexpat @@ -210,6 +210,34 @@ p.ParseFile(fake_reader) assert fake_reader.read_count == 4 + def test_entities(self): + import pyexpat + parser = pyexpat.ParserCreate(None, "") + + def startElement(tag, attrs): + assert tag == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF' + assert attrs == { + 'http://www.w3.org/XML/1998/namespacebase': + 'http://www.semanticweb.org/jiba/ontologies/2017/0/test'} + parser.StartElementHandler = startElement + parser.Parse(""" + + + + + + ]> + + + + """, True) + def test_exception(self): """ diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi0/test_version.py @@ -37,7 +37,7 @@ v = cffi.__version__.replace('+', '') p = os.path.join(parent, 'doc', 'source', 'installation.rst') content = open(p).read() - assert ("/cffi-%s.tar.gz" % v) in content + assert (" package version %s:" % v) in content def test_setup_version(): parent = os.path.dirname(os.path.dirname(cffi.__file__)) diff --git a/pypy/module/test_lib_pypy/test_sqlite3.py b/pypy/module/test_lib_pypy/test_sqlite3.py --- a/pypy/module/test_lib_pypy/test_sqlite3.py +++ b/pypy/module/test_lib_pypy/test_sqlite3.py @@ -228,6 +228,14 @@ cur.execute("create table test(a)") cur.executemany("insert into test values (?)", [[1], [2], [3]]) assert cur.lastrowid is None + # issue 2682 + cur.execute('''insert + into test + values (?) + ''', (1, )) + assert cur.lastrowid is not None + cur.execute('''insert\t into test values (?) ''', (1, )) + assert cur.lastrowid is not None def test_authorizer_bad_value(self, con): def authorizer_cb(action, arg1, arg2, dbname, source): diff --git a/pypy/module/unicodedata/test/test_hyp.py b/pypy/module/unicodedata/test/test_hyp.py --- a/pypy/module/unicodedata/test/test_hyp.py +++ b/pypy/module/unicodedata/test/test_hyp.py @@ -40,6 +40,7 @@ @pytest.mark.parametrize('NF1, NF2, NF3', compositions) @example(s=u'---\uafb8\u11a7---') # issue 2289 + at example(s=u'\ufacf') @settings(max_examples=1000) @given(s=st.text()) def test_composition(s, space, NF1, NF2, NF3): diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -1,4 +1,7 @@ # coding: utf-8 + +from pypy.interpreter.error import OperationError + class TestW_BytesObject: def teardown_method(self, method): @@ -96,6 +99,78 @@ w_res = space.call_function(space.w_bytes, space.wrap([42])) assert space.str_w(w_res) == '*' + +try: + from hypothesis import given, strategies +except ImportError: + pass +else: + @given(u=strategies.binary(), + start=strategies.integers(min_value=0, max_value=10), + len1=strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(u, start, len1, space): + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + w_u = space.wrap(u) + w_v = space.wrap(v) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 + + w_index = space.call_method(w_u, 'find', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == expected + + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 + + w_index = space.call_method(w_u, 'rfind', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == rexpected + + expected = u.startswith(v, start) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) + + expected = u.startswith(v, start, start + len1) + w_res = space.call_method(w_u, 'startswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) + + expected = u.endswith(v, start) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start)) + assert w_res is space.newbool(expected) + + expected = u.endswith(v, start, start + len1) + w_res = space.call_method(w_u, 'endswith', w_v, + space.newint(start), + space.newint(start + len1)) + assert w_res is space.newbool(expected) + + class AppTestBytesObject: def setup_class(cls): diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -1,6 +1,7 @@ # -*- encoding: utf-8 -*- import py import sys +from pypy.interpreter.error import OperationError class TestUnicodeObject: @@ -38,6 +39,55 @@ space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni) +try: + from hypothesis import given, strategies +except ImportError: + pass +else: + @given(u=strategies.text(), + start=strategies.integers(min_value=0, max_value=10), + len1=strategies.integers(min_value=-1, max_value=10)) + def test_hypo_index_find(u, start, len1, space): + if start + len1 < 0: + return # skip this case + v = u[start : start + len1] + w_u = space.wrap(u) + w_v = space.wrap(v) + expected = u.find(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'index', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert expected == -1 + else: + assert space.int_w(w_index) == expected >= 0 + + w_index = space.call_method(w_u, 'find', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == expected + + rexpected = u.rfind(v, start, start + len1) + try: + w_index = space.call_method(w_u, 'rindex', w_v, + space.newint(start), + space.newint(start + len1)) + except OperationError as e: + if not e.match(space, space.w_ValueError): + raise + assert rexpected == -1 + else: + assert space.int_w(w_index) == rexpected >= 0 + + w_index = space.call_method(w_u, 'rfind', w_v, + space.newint(start), + space.newint(start + len1)) + assert space.int_w(w_index) == rexpected + + class AppTestUnicodeStringStdOnly: def test_compares(self): assert type('a') != type(b'a') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -748,17 +748,6 @@ interpreted as in slice notation. """ - def decode(): - """S.decode(encoding=None, errors='strict') -> string or unicode - - Decode S using the codec registered for encoding. encoding defaults - to the default encoding. errors may be given to set a different error - handling scheme. Default is 'strict' meaning that encoding errors raise - a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' - as well as any other name registered with codecs.register_error that is - able to handle UnicodeDecodeErrors. - """ - def encode(): """S.encode(encoding=None, errors='strict') -> string or unicode diff --git a/pypy/tool/pytest/apptest.py b/pypy/tool/pytest/apptest.py --- a/pypy/tool/pytest/apptest.py +++ b/pypy/tool/pytest/apptest.py @@ -12,7 +12,7 @@ from pypy.interpreter.gateway import app2interp_temp from pypy.interpreter.error import OperationError from pypy.interpreter.function import Method -from rpython.tool import runsubprocess +from rpython.tool.runsubprocess import run_subprocess from pypy.tool.pytest import appsupport from pypy.tool.pytest.objspace import gettestobjspace from rpython.tool.udir import udir @@ -67,14 +67,10 @@ def _rename_module(name): return str(RENAMED_USEMODULES.get(name, name)) - -def run_with_python(python_, target_, usemodules, **definitions): - if python_ is None: - py.test.skip("Cannot find the default python3 interpreter to run with -A") - # we assume that the source of target_ is in utf-8. Unfortunately, we don't - # have any easy/standard way to determine from here the original encoding - # of the source file - helpers = r"""# -*- encoding: utf-8 -*- +# we assume that the source of target_ is in utf-8. Unfortunately, we don't +# have any easy/standard way to determine from here the original encoding +# of the source file +helpers = r"""# -*- encoding: utf-8 -*- if 1: import sys sys.path.append(%r) @@ -90,7 +86,7 @@ import os try: if isinstance(func, str): - if func.startswith((' ', os.linesep)): + if func.startswith((' ', os.linesep, '\n')): # it's probably an indented block, so we prefix if True: # to avoid SyntaxError func = "if True:\n" + func @@ -109,6 +105,10 @@ pass self = Test() """ + +def run_with_python(python_, target_, usemodules, **definitions): + if python_ is None: + py.test.skip("Cannot find the default python3 interpreter to run with -A") defs = [] for symbol, value in sorted(definitions.items()): if isinstance(value, tuple) and isinstance(value[0], py.code.Source): @@ -181,7 +181,7 @@ helper_dir = os.path.join(pypydir, 'tool', 'cpyext') env = os.environ.copy() env['PYTHONPATH'] = helper_dir - res, stdout, stderr = runsubprocess.run_subprocess( + res, stdout, stderr = run_subprocess( python_, [str(pyfile)], env=env) print pyfile.read() print >> sys.stdout, stdout diff --git a/pypy/tool/pytest/objspace.py b/pypy/tool/pytest/objspace.py --- a/pypy/tool/pytest/objspace.py +++ b/pypy/tool/pytest/objspace.py @@ -57,9 +57,6 @@ if not ok: py.test.skip("cannot runappdirect test: " "module %r required" % (modname,)) - else: - if '__pypy__' in value: - py.test.skip("no module __pypy__ on top of CPython") continue if info is None: py.test.skip("cannot runappdirect this test on top of CPython") diff --git a/rpython/doc/jit/optimizer.rst b/rpython/doc/jit/optimizer.rst --- a/rpython/doc/jit/optimizer.rst +++ b/rpython/doc/jit/optimizer.rst @@ -42,10 +42,9 @@ There are better ways to compute the sum from ``[0..100]``, but it gives a better intuition on how traces are constructed than ``sum(range(101))``. Note that the trace syntax is the one used in the test suite. It is also very -similar to traces printed at runtime by PYPYLOG_. The first line gives the input variables, the -second line is a ``label`` operation, the last one is the backwards ``jump`` operation. - -.. _PYPYLOG: logging.html +similar to traces printed at runtime by :doc:`PYPYLOG <../logging>`. The first +line gives the input variables, the second line is a ``label`` operation, the +last one is the backwards ``jump`` operation. These instructions mentioned earlier are special: diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -305,6 +305,10 @@ # Transform into INT_ADD. The following guard will be killed # by optimize_GUARD_NO_OVERFLOW; if we see instead an # optimize_GUARD_OVERFLOW, then InvalidLoop. + + # NB: this case also takes care of int_add_ovf with 0 as on of the + # arguments: the result will be bounded, and then the optimization + # for int_add with 0 as argument will remove the op. op = self.replace_op_with(op, rop.INT_ADD) return self.emit(op) @@ -325,6 +329,7 @@ return None resbound = b0.sub_bound(b1) if resbound.bounded(): + # this case takes care of int_sub_ovf(x, 0) as well op = self.replace_op_with(op, rop.INT_SUB) return self.emit(op) @@ -342,6 +347,7 @@ b2 = self.getintbound(op.getarg(1)) resbound = b1.mul_bound(b2) if resbound.bounded(): + # this case also takes care of multiplication with 0 and 1 op = self.replace_op_with(op, rop.INT_MUL) return self.emit(op) diff --git a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py --- a/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_optimizebasic.py @@ -1962,6 +1962,55 @@ """ self.optimize_loop(ops, expected) + ops = """ + [i0] + i1 = int_mul_ovf(0, i0) + guard_no_overflow() [] + jump(i1) + """ + expected = """ + [i0] + jump(0) + """ + self.optimize_loop(ops, expected) + + ops = """ + [i0] + i1 = int_mul_ovf(i0, 0) + guard_no_overflow() [] + jump(i1) + """ + expected = """ + [i0] + jump(0) + """ + self.optimize_loop(ops, expected) + + ops = """ + [i0] + i1 = int_mul_ovf(1, i0) + guard_no_overflow() [] + jump(i1) + """ + expected = """ + [i0] + jump(i0) + """ + self.optimize_loop(ops, expected) + + ops = """ + [i0] + i1 = int_mul_ovf(i0, 1) + guard_no_overflow() [] + jump(i1) + """ + expected = """ + [i0] + jump(i0) + """ + self.optimize_loop(ops, expected) + + def test_fold_constant_partial_ops_float(self): ops = """ [f0] diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -1863,6 +1863,8 @@ rffi.INT, save_err=rffi.RFFI_FULL_ERRNO_ZERO) c_sched_get_priority_min = external('sched_get_priority_min', [rffi.INT], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO) + if not _WIN32: + c_sched_yield = external('sched_yield', [], rffi.INT) @enforceargs(int) def sched_get_priority_max(policy): @@ -1872,9 +1874,9 @@ def sched_get_priority_min(policy): return handle_posix_error('sched_get_priority_min', c_sched_get_priority_min(policy)) - - - + def sched_yield(): + return handle_posix_error('sched_yield', c_sched_yield()) + #___________________________________________________________________ c_chroot = external('chroot', [rffi.CCHARP], rffi.INT, diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py --- a/rpython/rlib/runicode.py +++ b/rpython/rlib/runicode.py @@ -877,32 +877,31 @@ ch = ord(s[pos]) pos += 1 ch2 = 0 - if 0xD800 <= ch < 0xDC00: - if not allow_surrogates: - ru, rs, pos = errorhandler(errors, public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - if rs is not None: - # py3k only - if len(rs) % 4 != 0: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - result.append(rs) - continue - for ch in ru: - if ord(ch) < 0xD800: - _STORECHAR32(result, ord(ch), byteorder) - else: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, rs, pos = errorhandler(errors, public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler('strict', public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + result.append(rs) continue - elif MAXUNICODE < 65536 and pos < size: - ch2 = ord(s[pos]) - if 0xDC00 <= ch2 < 0xE000: - ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; - pos += 1 + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler('strict', public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + continue + if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: + ch2 = ord(s[pos]) + if 0xDC00 <= ch2 < 0xE000: + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; + pos += 1 _STORECHAR32(result, ch, byteorder) return result.build() diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -810,6 +810,11 @@ assert isinstance(low, int) == True assert isinstance(high, int) == True assert high > low + + at rposix_requires('sched_yield') +def test_sched_yield(): + if sys.platform != 'win32': + rposix.sched_yield() @rposix_requires('lockf') def test_os_lockf(): diff --git a/rpython/rlib/test/test_runicode.py b/rpython/rlib/test/test_runicode.py --- a/rpython/rlib/test/test_runicode.py +++ b/rpython/rlib/test/test_runicode.py @@ -2,6 +2,7 @@ import py import sys, random +import struct from rpython.rlib import runicode from hypothesis import given, settings, strategies @@ -266,11 +267,12 @@ assert replace_with(u'rep', None) == '\x00<\x00r\x00e\x00p\x00>' assert replace_with(None, '\xca\xfe') == '\x00<\xca\xfe\x00>' - def test_utf32_surrogates(self): + @py.test.mark.parametrize('unich',[u"\ud800", u"\udc80"]) + def test_utf32_surrogates(self, unich): assert runicode.unicode_encode_utf_32_be( - u"\ud800", 1, None) == '\x00\x00\xd8\x00' + unich, 1, None) == struct.pack('>i', ord(unich)) py.test.raises(UnicodeEncodeError, runicode.unicode_encode_utf_32_be, - u"\ud800", 1, None, allow_surrogates=False) + unich, 1, None, allow_surrogates=False) def replace_with(ru, rs): def errorhandler(errors, enc, msg, u, startingpos, endingpos): if errors == 'strict': @@ -278,7 +280,7 @@ endingpos, msg) return ru, rs, endingpos return runicode.unicode_encode_utf_32_be( - u"<\ud800>", 3, None, + u"<%s>" % unich, 3, None, errorhandler, allow_surrogates=False) assert replace_with(u'rep', None) == u''.encode('utf-32-be') assert replace_with(None, '\xca\xfe\xca\xfe') == '\x00\x00\x00<\xca\xfe\xca\xfe\x00\x00\x00>' @@ -432,7 +434,7 @@ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'ignore', final=True) == (u'aaaabbbb', len(seq) + 8)) assert (self.decoder(seq, len(seq), 'custom', final=True, - errorhandler=self.custom_replace) == + errorhandler=self.custom_replace) == (FOO * len(seq), len(seq))) assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'custom', final=True, errorhandler=self.custom_replace) == @@ -628,7 +630,7 @@ msg='invalid continuation byte') assert self.decoder(seq, len(seq), 'replace', final=True ) == (res, len(seq)) - assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, + assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, 'replace', final=True) == (u'aaaa' + res + u'bbbb', len(seq) + 8)) res = res.replace(FFFD, u'') From pypy.commits at gmail.com Fri Nov 3 12:00:08 2017 From: pypy.commits at gmail.com (nanjekye) Date: Fri, 03 Nov 2017 09:00:08 -0700 (PDT) Subject: [pypy-commit] pypy os_lockf: left over merge conflict Message-ID: <59fc9288.08921c0a.e8509.669f@mx.google.com> Author: Joannah Nanjekye Branch: os_lockf Changeset: r92919:3cb34f15cb82 Date: 2017-10-30 13:09 +0300 http://bitbucket.org/pypy/pypy/changeset/3cb34f15cb82/ Log: left over merge conflict diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -2491,4 +2491,3 @@ wrap_oserror(space, e, eintr_retry=True) else: return space.newint(res) ->>>>>>> other From pypy.commits at gmail.com Fri Nov 3 12:46:26 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 03 Nov 2017 09:46:26 -0700 (PDT) Subject: [pypy-commit] buildbot default: Run the extra_tests/ tests in a virtualenv in translated test runs. Message-ID: <59fc9d62.089e1c0a.d6780.7ea3@mx.google.com> Author: Ronan Lamy Branch: Changeset: r1037:0ba20064633a Date: 2017-11-02 16:45 +0000 http://bitbucket.org/pypy/buildbot/changeset/0ba20064633a/ Log: Run the extra_tests/ tests in a virtualenv in translated test runs. diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -282,7 +282,7 @@ # a good idea, passing a "--rev" argument here changes the order of # the checkouts. Then our revisions "12345:432bcbb1ba" are bogus. def _my_pullUpdate(self, res): - command = ['pull' , self.repourl] + command = ['pull', self.repourl] #if self.revision: # command.extend(['--rev', self.revision]) d = self._dovccmd(command) @@ -447,7 +447,7 @@ )) if app_tests: - if app_tests == True: + if app_tests is True: app_tests = [] factory.addStep(PytestCmd( description="app-level (-A) test", @@ -457,6 +457,28 @@ timeout=4000, env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest), })) + test_interpreter = '../build/pypy/goal/pypy-c' + factory.addStep(ShellCmd( + description="Create virtualenv", + command=prefix + ['virtualenv', '--clear', '-p', test_interpreter, + 'pypy-venv'], + workdir='venv', + flunkOnFailure=True)) + if platform == 'win32': + virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe' + else: + virt_pypy = '../venv/pypy-venv/bin/python' + factory.addStep(ShellCmd( + description="Install extra tests requirements", + command=prefix + [virt_pypy, '-m', 'pip', 'install', + '-r', '../build/extra_tests/requirements.txt'], + workdir='testing')) + factory.addStep(PytestCmd( + description="Run extra tests", + command=prefix + [virt_pypy, '-m', 'pytest', + '../build/extra_tests', '--resultlog=extra.log'], + logfiles={'pytestLog': 'extra.log'}, + workdir='testing')) if lib_python: factory.addStep(PytestCmd( From pypy.commits at gmail.com Fri Nov 3 12:51:48 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 03 Nov 2017 09:51:48 -0700 (PDT) Subject: [pypy-commit] pypy run-extra-tests: document branch Message-ID: <59fc9ea4.88c5df0a.9d3a1.be03@mx.google.com> Author: Ronan Lamy Branch: run-extra-tests Changeset: r92921:f81b135f1265 Date: 2017-11-03 16:49 +0000 http://bitbucket.org/pypy/pypy/changeset/f81b135f1265/ Log: document branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -17,3 +17,6 @@ .. branch: bsd-patches Fix failures on FreeBSD, contributed by David Naylor as patches on the issue tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot From pypy.commits at gmail.com Fri Nov 3 12:51:50 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 03 Nov 2017 09:51:50 -0700 (PDT) Subject: [pypy-commit] pypy default: merge branch run-extra-tests Message-ID: <59fc9ea6.177c1c0a.b5fcf.9ae6@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92922:a88ed18e1a6a Date: 2017-11-03 16:51 +0000 http://bitbucket.org/pypy/pypy/changeset/a88ed18e1a6a/ Log: merge branch run-extra-tests diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_failing.py @@ -0,0 +1,8 @@ +from hypothesis import given, strategies + +def mean(a, b): + return (a + b)/2. + + at given(strategies.integers(), strategies.integers()) +def test_mean_failing(a, b): + assert mean(a, b) >= min(a, b) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -17,3 +17,6 @@ .. branch: bsd-patches Fix failures on FreeBSD, contributed by David Naylor as patches on the issue tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot From pypy.commits at gmail.com Fri Nov 3 15:13:39 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 03 Nov 2017 12:13:39 -0700 (PDT) Subject: [pypy-commit] pypy default: Kill fake test Message-ID: <59fcbfe3.69a8df0a.fef90.b31d@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92923:d1cd247b10f6 Date: 2017-11-03 19:13 +0000 http://bitbucket.org/pypy/pypy/changeset/d1cd247b10f6/ Log: Kill fake test diff --git a/extra_tests/test_failing.py b/extra_tests/test_failing.py deleted file mode 100644 --- a/extra_tests/test_failing.py +++ /dev/null @@ -1,8 +0,0 @@ -from hypothesis import given, strategies - -def mean(a, b): - return (a + b)/2. - - at given(strategies.integers(), strategies.integers()) -def test_mean_failing(a, b): - assert mean(a, b) >= min(a, b) From pypy.commits at gmail.com Sat Nov 4 07:10:07 2017 From: pypy.commits at gmail.com (antocuni) Date: Sat, 04 Nov 2017 04:10:07 -0700 (PDT) Subject: [pypy-commit] pypy vmprof-0.4.10: a branch where to update the code to vmprof 0.4.10 Message-ID: <59fda00f.6293df0a.4e1d5.0bdb@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92924:4d73e43ae3fb Date: 2017-11-04 11:15 +0100 http://bitbucket.org/pypy/pypy/changeset/4d73e43ae3fb/ Log: a branch where to update the code to vmprof 0.4.10 From pypy.commits at gmail.com Sat Nov 4 07:10:11 2017 From: pypy.commits at gmail.com (antocuni) Date: Sat, 04 Nov 2017 04:10:11 -0700 (PDT) Subject: [pypy-commit] pypy vmprof-0.4.10: I claim that tests should NEVER fail silently; I think that test_native actually fails on linux, but the builtbot never noticed because vmprof is not installed. Probably this test will start failing because of missing vmprof, we'll think of a fix later Message-ID: <59fda013.0a0b1c0a.300cd.d940@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92926:526d9b94882a Date: 2017-11-04 12:09 +0100 http://bitbucket.org/pypy/pypy/changeset/526d9b94882a/ Log: I claim that tests should NEVER fail silently; I think that test_native actually fails on linux, but the builtbot never noticed because vmprof is not installed. Probably this test will start failing because of missing vmprof, we'll think of a fix later diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -141,10 +141,6 @@ fn = compile(f, [], gcpolicy="minimark") assert fn() == 0 try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: check_profile(tmpfilename) finally: assert os.path.exists(tmpfilename) @@ -231,10 +227,6 @@ fn = compile(f, [], gcpolicy="incminimark", lldebug=True) assert fn() == 0 try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: check_profile(tmpfilename) finally: assert os.path.exists(tmpfilename) From pypy.commits at gmail.com Sat Nov 4 07:10:09 2017 From: pypy.commits at gmail.com (antocuni) Date: Sat, 04 Nov 2017 04:10:09 -0700 (PDT) Subject: [pypy-commit] pypy vmprof-0.4.10: bah, I think that this test did not actually test anything because on buildbot the cwd was different that the test expects, and thus os.walk returned an empty list O_o. Make it more robust, and actually check all files instead of stopping at the first one Message-ID: <59fda011.1cbf1c0a.fbe1b.7909@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92925:0317d4f69638 Date: 2017-11-04 12:03 +0100 http://bitbucket.org/pypy/pypy/changeset/0317d4f69638/ Log: bah, I think that this test did not actually test anything because on buildbot the cwd was different that the test expects, and thus os.walk returned an empty list O_o. Make it more robust, and actually check all files instead of stopping at the first one diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -2,6 +2,7 @@ import urllib2, py from os.path import join +RVMPROF = py.path.local(__file__).join('..', '..') def github_raw_file(repo, path, branch='master'): return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict( @@ -10,17 +11,26 @@ def test_same_file(): - for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'): - for file in files: - if not (file.endswith(".c") or file.endswith(".h")): - continue - url = github_raw_file("vmprof/vmprof-python", "src/%s" % file) - source = urllib2.urlopen(url).read() - # - dest = py.path.local(join(root, file)).read() - if source != dest: - raise AssertionError("%s was updated, but changes were" - "not copied over to PyPy" % url) - else: - print("%s matches" % url) - break # do not walk dirs + shared = RVMPROF.join('src', 'shared') + files = shared.listdir('*.[ch]') + assert files, 'cannot find any C file, probably the directory is wrong?' + no_matches = [] + print + for file in files: + url = github_raw_file("vmprof/vmprof-python", "src/%s" % file.basename) + source = urllib2.urlopen(url).read() + dest = file.read() + shortname = file.relto(RVMPROF) + if source == dest: + print '%s matches' % shortname + else: + print '%s does NOT match' % shortname + no_matches.append(file) + # + if no_matches: + print + print 'The following file dit NOT match' + for f in no_matches: + print ' ', f.relto(RVMPROF) + raise AssertionError("some files were updated on github, " + "but were not copied here") From pypy.commits at gmail.com Sat Nov 4 14:08:04 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 04 Nov 2017 11:08:04 -0700 (PDT) Subject: [pypy-commit] pypy default: graft parts of 287c9946859b that provide rposix.lockf in rpython Message-ID: <59fe0204.90b2df0a.de390.f0dc@mx.google.com> Author: Matti Picus Branch: Changeset: r92928:9d22ff3be2ae Date: 2017-11-04 20:07 +0200 http://bitbucket.org/pypy/pypy/changeset/9d22ff3be2ae/ Log: graft parts of 287c9946859b that provide rposix.lockf in rpython diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -276,6 +276,10 @@ SCHED_OTHER = rffi_platform.DefinedConstantInteger('SCHED_OTHER') SCHED_BATCH = rffi_platform.DefinedConstantInteger('SCHED_BATCH') O_NONBLOCK = rffi_platform.DefinedConstantInteger('O_NONBLOCK') + F_LOCK = rffi_platform.DefinedConstantInteger('F_LOCK') + F_TLOCK = rffi_platform.DefinedConstantInteger('F_TLOCK') + F_ULOCK = rffi_platform.DefinedConstantInteger('F_ULOCK') + F_TEST = rffi_platform.DefinedConstantInteger('F_TEST') OFF_T = rffi_platform.SimpleType('off_t') OFF_T_SIZE = rffi_platform.SizeOf('off_t') @@ -548,6 +552,14 @@ if error != 0: raise OSError(error, 'posix_fadvise failed') + c_lockf = external('lockf', + [rffi.INT, rffi.INT , OFF_T], rffi.INT, + save_err=rffi.RFFI_SAVE_ERRNO) + @enforceargs(int, None, None) + def lockf(fd, cmd, length): + validate_fd(fd) + return handle_posix_error('lockf', c_lockf(fd, cmd, length)) + c_ftruncate = external('ftruncate', [rffi.INT, rffi.LONGLONG], rffi.INT, macro=_MACRO_ON_POSIX, save_err=rffi.RFFI_SAVE_ERRNO) c_fsync = external('fsync' if not _WIN32 else '_commit', [rffi.INT], rffi.INT, diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py --- a/rpython/rlib/test/test_rposix.py +++ b/rpython/rlib/test/test_rposix.py @@ -816,3 +816,14 @@ if sys.platform != 'win32': rposix.sched_yield() + at rposix_requires('lockf') +def test_os_lockf(): + fname = str(udir.join('os_test.txt')) + fd = os.open(fname, os.O_WRONLY | os.O_CREAT, 0777) + try: + os.write(fd, b'test') + os.lseek(fd, 0, 0) + rposix.lockf(fd, rposix.F_LOCK, 4) + rposix.lockf(fd, rposix.F_ULOCK, 4) + finally: + os.close(fd) From pypy.commits at gmail.com Sat Nov 4 14:19:22 2017 From: pypy.commits at gmail.com (stian) Date: Sat, 04 Nov 2017 11:19:22 -0700 (PDT) Subject: [pypy-commit] pypy math-improvements: Make rshift invert (in most cases) in place, this makes a huge speedup for rshift with negative numbers as it avoids two extra copies, also make an rqshift for the power of twos Message-ID: <59fe04aa.4a9fdf0a.b3328.8a76@mx.google.com> Author: stian Branch: math-improvements Changeset: r92929:f30c2f38b0b5 Date: 2017-11-04 19:18 +0100 http://bitbucket.org/pypy/pypy/changeset/f30c2f38b0b5/ Log: Make rshift invert (in most cases) in place, this makes a huge speedup for rshift with negative numbers as it avoids two extra copies, also make an rqshift for the power of twos diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -787,7 +787,7 @@ if digit == 1: return rbigint(self._digits[:self.numdigits()], 1, self.numdigits()) elif digit and digit & (digit - 1) == 0: - return self.rshift(ptwotable[digit]) + return self.rqshift(ptwotable[digit]) div, mod = _divrem(self, other) if mod.sign * other.sign == -1: @@ -816,7 +816,7 @@ if digit == 1: return self elif digit & (digit - 1) == 0: - return self.rshift(ptwotable[digit]) + return self.rqshift(ptwotable[digit]) div, mod = _divrem1(self, digit) @@ -1267,31 +1267,85 @@ raise ValueError("negative shift count") elif int_other == 0: return self + invert = False if self.sign == -1 and not dont_invert: - a = self.invert().rshift(int_other) - return a.invert() + first = self.digit(0) + if first == 0: + a = self.invert().rshift(int_other) + return a.invert() + invert = True wordshift = int_other / SHIFT + loshift = int_other % SHIFT newsize = self.numdigits() - wordshift if newsize <= 0: - return NULLRBIGINT - - loshift = int_other % SHIFT + if invert: + return ONENEGATIVERBIGINT + else: + return NULLRBIGINT + + hishift = SHIFT - loshift z = rbigint([NULLDIGIT] * newsize, self.sign, newsize) i = 0 while i < newsize: - newdigit = (self.udigit(wordshift) >> loshift) + digit = self.udigit(wordshift) + if i == 0 and invert and wordshift == 0: + digit -= 1 + newdigit = (digit >> loshift) if i+1 < newsize: newdigit |= (self.udigit(wordshift+1) << hishift) z.setdigit(i, newdigit) i += 1 wordshift += 1 + if invert: + z.setdigit(0, z.digit(0)+1) z._normalize() return z rshift._always_inline_ = 'try' # It's so fast that it's always benefitial. @jit.elidable + def rqshift(self, int_other): + wordshift = int_other / SHIFT + loshift = int_other % SHIFT + newsize = self.numdigits() - wordshift + + invert = False + if self.sign == -1: + first = self.digit(0) + if first == 0: + a = self.invert().rqshift(int_other) + return a.invert() + invert = True + + if newsize <= 0: + if invert: + return ONENEGATIVERBIGINT + else: + return NULLRBIGINT + + + hishift = SHIFT - loshift + z = rbigint([NULLDIGIT] * newsize, self.sign, newsize) + i = 0 + inverted = False + while i < newsize: + digit = self.udigit(wordshift) + if invert and i == 0 and wordshift == 0: + digit -= 1 + newdigit = (digit >> loshift) + if i+1 < newsize: + newdigit |= (self.udigit(wordshift+1) << hishift) + z.setdigit(i, newdigit) + i += 1 + wordshift += 1 + if invert: + z.setdigit(0, z.digit(0)+1) + z._normalize() + return z + rshift._always_inline_ = 'try' # It's so fast that it's always benefitial. + + @jit.elidable def abs_rshift_and_mask(self, bigshiftcount, mask): assert isinstance(bigshiftcount, r_ulonglong) assert mask >= 0 diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -598,6 +598,33 @@ res3 = f1.abs_rshift_and_mask(r_ulonglong(y), mask) assert res3 == (abs(x) >> y) & mask + def test_qshift(self): + for x in range(10): + for y in range(1, 161, 16): + num = (x << y) + x + f1 = rbigint.fromlong(num) + nf1 = rbigint.fromlong(-num) + + for z in range(1, 31): + res1 = f1.lqshift(z).tolong() + res2 = f1.rqshift(z).tolong() + res3 = nf1.lqshift(z).tolong() + res4 = nf1.rqshift(z).tolong() + + assert res1 == num << z + assert res2 == num >> z + assert res3 == -num << z + assert res4 == -num >> z + + # Large digit + for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10): + f1 = rbigint.fromlong(x) + nf1 = rbigint.fromlong(-x) + assert f1.rqshift(SHIFT).tolong() == x >> SHIFT + assert nf1.rqshift(SHIFT).tolong() == -x >> SHIFT + assert f1.rqshift(SHIFT+1).tolong() == x >> (SHIFT+1) + assert nf1.rqshift(SHIFT+1).tolong() == -x >> (SHIFT+1) + def test_from_list_n_bits(self): for x in ([3L ** 30L, 5L ** 20L, 7 ** 300] + [1L << i for i in range(130)] + From pypy.commits at gmail.com Sat Nov 4 13:32:37 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 04 Nov 2017 10:32:37 -0700 (PDT) Subject: [pypy-commit] pypy default: whoops in 5c8b7f2cd6b7 Message-ID: <59fdf9b5.90051c0a.ada36.e348@mx.google.com> Author: Matti Picus Branch: Changeset: r92927:1ea57a8b4a91 Date: 2017-11-04 19:31 +0200 http://bitbucket.org/pypy/pypy/changeset/1ea57a8b4a91/ Log: whoops in 5c8b7f2cd6b7 diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py --- a/pypy/module/zipimport/test/test_zipimport.py +++ b/pypy/module/zipimport/test/test_zipimport.py @@ -197,7 +197,7 @@ self.writefile("xxbad_pyc.pyc", test_pyc) raises(zipimport.ZipImportError, "__import__('xxbad_pyc', globals(), locals(), [])") - assert 'uu' not in sys.modules + assert 'xxbad_pyc' not in sys.modules def test_force_py(self): import sys From pypy.commits at gmail.com Sat Nov 4 16:28:39 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 04 Nov 2017 13:28:39 -0700 (PDT) Subject: [pypy-commit] pypy default: Add extra-tests for string methods, matching the interp-level tests added in 88bed3bb8ad4 Message-ID: <59fe22f7.4d051c0a.bb8f7.65bd@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92930:b97f900404e5 Date: 2017-11-04 20:28 +0000 http://bitbucket.org/pypy/pypy/changeset/b97f900404e5/ Log: Add extra-tests for string methods, matching the interp-level tests added in 88bed3bb8ad4 diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,82 @@ +from hypothesis import strategies as st +from hypothesis import given, example + + at given(st.binary(), st.binary(), st.binary()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.binary(), st.binary()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st.binary(), st.binary(), st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st.binary(), st.binary(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.binary(), st.binary()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st.binary(), st.binary(), st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st.binary(), st.binary(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected From pypy.commits at gmail.com Sat Nov 4 17:14:29 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 04 Nov 2017 14:14:29 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <59fe2db5.4d051c0a.bb8f7.6ffa@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92931:a433b30d93a4 Date: 2017-11-04 21:06 +0000 http://bitbucket.org/pypy/pypy/changeset/a433b30d93a4/ Log: hg merge default diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,82 @@ +from hypothesis import strategies as st +from hypothesis import given, example + + at given(st.binary(), st.binary(), st.binary()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.binary(), st.binary(), st.binary()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.binary(), st.binary()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st.binary(), st.binary(), st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st.binary(), st.binary(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.binary(), st.binary()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st.binary(), st.binary(), st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st.binary(), st.binary(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -17,6 +17,12 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,13 @@ .. branch: docs-osx-brew-openssl +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy3' '*/bin/libpypy3-c.so'" + binfiles = "'*/bin/pypy3*' '*/bin/libpypy3-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -314,7 +314,7 @@ # ======================================================================== class W_CDLL(W_Root): - def __init__(self, space, name, mode): + def __init__(self, space, name, mode, handle): self.flags = libffi.FUNCFLAG_CDECL self.space = space if name is None: @@ -322,7 +322,7 @@ else: self.name = name try: - self.cdll = libffi.CDLL(name, mode) + self.cdll = libffi.CDLL(name, mode, handle) except DLOpenError as e: raise wrap_dlopenerror(space, e, self.name) except OSError as e: @@ -344,9 +344,9 @@ def getidentifier(self, space): return space.newint(self.cdll.getidentifier()) - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_cdll(space, w_type, name, mode=-1): - return W_CDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_cdll(space, w_type, name, mode=-1, handle=0): + return W_CDLL(space, name, mode, handle) W_CDLL.typedef = TypeDef( @@ -359,13 +359,13 @@ ) class W_WinDLL(W_CDLL): - def __init__(self, space, name, mode): - W_CDLL.__init__(self, space, name, mode) + def __init__(self, space, name, mode, handle): + W_CDLL.__init__(self, space, name, mode, handle) self.flags = libffi.FUNCFLAG_STDCALL - at unwrap_spec(name='fsencode_or_none', mode=int) -def descr_new_windll(space, w_type, name, mode=-1): - return W_WinDLL(space, name, mode) + at unwrap_spec(name='fsencode_or_none', mode=int, handle=int) +def descr_new_windll(space, w_type, name, mode=-1, handle=0): + return W_WinDLL(space, name, mode, handle) W_WinDLL.typedef = TypeDef( @@ -380,4 +380,4 @@ # ======================================================================== def get_libc(space): - return W_CDLL(space, get_libc_name(), -1) + return W_CDLL(space, get_libc_name(), -1, 0) diff --git a/pypy/module/_vmprof/test/test__vmprof.py b/pypy/module/_vmprof/test/test__vmprof.py --- a/pypy/module/_vmprof/test/test__vmprof.py +++ b/pypy/module/_vmprof/test/test__vmprof.py @@ -1,3 +1,4 @@ +import py import sys from rpython.tool.udir import udir from pypy.tool.pytest.objspace import gettestobjspace @@ -110,6 +111,7 @@ _vmprof.disable() assert _vmprof.is_enabled() is False + @py.test.mark.xfail(sys.platform.startswith('freebsd'), reason = "not implemented") def test_get_profile_path(self): import _vmprof tmpfile = open(self.tmpfilename, 'wb') diff --git a/pypy/module/termios/test/test_termios.py b/pypy/module/termios/test/test_termios.py --- a/pypy/module/termios/test/test_termios.py +++ b/pypy/module/termios/test/test_termios.py @@ -7,9 +7,6 @@ if os.name != 'posix': py.test.skip('termios module only available on unix') -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') - class TestTermios(object): def setup_class(cls): try: diff --git a/pypy/module/test_lib_pypy/pyrepl/__init__.py b/pypy/module/test_lib_pypy/pyrepl/__init__.py --- a/pypy/module/test_lib_pypy/pyrepl/__init__.py +++ b/pypy/module/test_lib_pypy/pyrepl/__init__.py @@ -1,6 +1,3 @@ import sys import lib_pypy.pyrepl sys.modules['pyrepl'] = sys.modules['lib_pypy.pyrepl'] - -if sys.platform.startswith('freebsd'): - raise Exception('XXX seems to hangs on FreeBSD9') diff --git a/pypy/module/test_lib_pypy/pyrepl/test_readline.py b/pypy/module/test_lib_pypy/pyrepl/test_readline.py --- a/pypy/module/test_lib_pypy/pyrepl/test_readline.py +++ b/pypy/module/test_lib_pypy/pyrepl/test_readline.py @@ -4,7 +4,7 @@ @pytest.mark.skipif("os.name != 'posix' or 'darwin' in sys.platform or " - "'kfreebsd' in sys.platform") + "'freebsd' in sys.platform") def test_raw_input(): import os import pty diff --git a/pypy/module/zipimport/test/test_zipimport.py b/pypy/module/zipimport/test/test_zipimport.py --- a/pypy/module/zipimport/test/test_zipimport.py +++ b/pypy/module/zipimport/test/test_zipimport.py @@ -196,19 +196,19 @@ m0 = self.get_pyc()[0] m0 ^= 0x04 test_pyc = bytes([m0]) + self.get_pyc()[1:] - self.writefile("uu.pyc", test_pyc) + self.writefile("xxbad_pyc.pyc", test_pyc) raises(zipimport.ZipImportError, - "__import__('uu', globals(), locals(), [])") - assert 'uu' not in sys.modules + "__import__('xxbad_pyc', globals(), locals(), [])") + assert 'xxbad_pyc' not in sys.modules def test_force_py(self): import sys m0 = self.get_pyc()[0] m0 ^= 0x04 test_pyc = bytes([m0]) + self.get_pyc()[1:] - self.writefile("uu.pyc", test_pyc) - self.writefile("uu.py", "def f(x): return x") - mod = __import__("uu", globals(), locals(), []) + self.writefile("xxforce_py.pyc", test_pyc) + self.writefile("xxforce_py.py", "def f(x): return x") + mod = __import__("xxforce_py", globals(), locals(), []) assert mod.f(3) == 3 def test_sys_modules(self): diff --git a/pypy/tool/cpyext/extbuild.py b/pypy/tool/cpyext/extbuild.py --- a/pypy/tool/cpyext/extbuild.py +++ b/pypy/tool/cpyext/extbuild.py @@ -246,13 +246,13 @@ if sys.platform == 'win32': compile_extra = ["/we4013"] link_extra = ["/LIBPATH:" + os.path.join(sys.exec_prefix, 'libs')] - elif sys.platform == 'darwin': - compile_extra = link_extra = None - pass elif sys.platform.startswith('linux'): compile_extra = [ "-O0", "-g", "-Werror=implicit-function-declaration", "-fPIC"] link_extra = None + else: + compile_extra = link_extra = None + pass return ExtensionCompiler( builddir_base=base_dir, include_extra=[get_python_inc()], diff --git a/pypy/tool/release/package.py b/pypy/tool/release/package.py --- a/pypy/tool/release/package.py +++ b/pypy/tool/release/package.py @@ -224,8 +224,9 @@ old_dir = os.getcwd() try: os.chdir(str(builddir)) - for source, target in binaries: - smartstrip(bindir.join(target), keep_debug=options.keep_debug) + if not _fake: + for source, target in binaries: + smartstrip(bindir.join(target), keep_debug=options.keep_debug) # if USE_ZIPFILE_MODULE: import zipfile diff --git a/pypy/tool/release/smartstrip.py b/pypy/tool/release/smartstrip.py --- a/pypy/tool/release/smartstrip.py +++ b/pypy/tool/release/smartstrip.py @@ -19,6 +19,9 @@ if sys.platform == 'linux2': os.system("objcopy --only-keep-debug %s %s" % (exe, debug)) os.system("objcopy --add-gnu-debuglink=%s %s" % (debug, exe)) + perm = debug.stat().mode + perm &= ~(0111) # remove the 'x' bit + debug.chmod(perm) def smartstrip(exe, keep_debug=True): exe = py.path.local(exe) diff --git a/pypy/tool/release/test/test_smartstrip.py b/pypy/tool/release/test/test_smartstrip.py --- a/pypy/tool/release/test/test_smartstrip.py +++ b/pypy/tool/release/test/test_smartstrip.py @@ -42,6 +42,9 @@ smartstrip(exe, keep_debug=True) debug = tmpdir.join("myprog.debug") assert debug.check(file=True) + perm = debug.stat().mode & 0777 + assert perm & 0111 == 0 # 'x' bit not set + # info = info_symbol(exe, "foo") assert info == "foo in section .text of %s" % exe # diff --git a/rpython/rlib/libffi.py b/rpython/rlib/libffi.py --- a/rpython/rlib/libffi.py +++ b/rpython/rlib/libffi.py @@ -434,11 +434,12 @@ # XXX: it partially duplicate the code in clibffi.py class CDLL(object): - def __init__(self, libname, mode=-1): + def __init__(self, libname, mode=-1, lib=0): """Load the library, or raises DLOpenError.""" - self.lib = rffi.cast(DLLHANDLE, 0) - with rffi.scoped_str2charp(libname) as ll_libname: - self.lib = dlopen(ll_libname, mode) + self.lib = rffi.cast(DLLHANDLE, lib) + if lib == 0: + with rffi.scoped_str2charp(libname) as ll_libname: + self.lib = dlopen(ll_libname, mode) def __del__(self): if self.lib: diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -47,7 +47,10 @@ # Guessing a BSD-like Unix platform compile_extra += ['-DVMPROF_UNIX'] compile_extra += ['-DVMPROF_MAC'] - _libs = [] + if sys.platform.startswith('freebsd'): + _libs = ['unwind'] + else: + _libs = [] eci_kwds = dict( diff --git a/rpython/rtyper/lltypesystem/ll2ctypes.py b/rpython/rtyper/lltypesystem/ll2ctypes.py --- a/rpython/rtyper/lltypesystem/ll2ctypes.py +++ b/rpython/rtyper/lltypesystem/ll2ctypes.py @@ -1147,7 +1147,7 @@ libc_name = get_libc_name() # Make sure the name is determined during import, not at runtime if _FREEBSD: RTLD_DEFAULT = -2 # see - rtld_default_lib = ctypes.CDLL("RTLD_DEFAULT", handle=RTLD_DEFAULT, **load_library_kwargs) + rtld_default_lib = ctypes.CDLL("ld-elf.so.1", handle=RTLD_DEFAULT, **load_library_kwargs) # XXX is this always correct??? standard_c_lib = ctypes.CDLL(libc_name, **load_library_kwargs) @@ -1243,7 +1243,7 @@ if cfunc is None: if _FREEBSD and funcname in ('dlopen', 'fdlopen', 'dlsym', 'dlfunc', 'dlerror', 'dlclose'): - cfunc = get_on_lib(rtld_default_lib, funcname) + cfunc = rtld_default_lib[funcname] else: cfunc = get_on_lib(standard_c_lib, funcname) # XXX magic: on Windows try to load the function from 'kernel32' too From pypy.commits at gmail.com Sat Nov 4 17:14:31 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 04 Nov 2017 14:14:31 -0700 (PDT) Subject: [pypy-commit] pypy py3.5: Kill tests involving dodgy comparisons with CPython 2 and superseded by b97f900404e5 Message-ID: <59fe2db7.4fc7df0a.3c6f8.883d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92932:186f4b89a84a Date: 2017-11-04 21:13 +0000 http://bitbucket.org/pypy/pypy/changeset/186f4b89a84a/ Log: Kill tests involving dodgy comparisons with CPython 2 and superseded by b97f900404e5 diff --git a/pypy/objspace/std/test/test_bytesobject.py b/pypy/objspace/std/test/test_bytesobject.py --- a/pypy/objspace/std/test/test_bytesobject.py +++ b/pypy/objspace/std/test/test_bytesobject.py @@ -100,77 +100,6 @@ assert space.str_w(w_res) == '*' -try: - from hypothesis import given, strategies -except ImportError: - pass -else: - @given(u=strategies.binary(), - start=strategies.integers(min_value=0, max_value=10), - len1=strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(u, start, len1, space): - if start + len1 < 0: - return # skip this case - v = u[start : start + len1] - w_u = space.wrap(u) - w_v = space.wrap(v) - expected = u.find(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'index', w_v, - space.newint(start), - space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert expected == -1 - else: - assert space.int_w(w_index) == expected >= 0 - - w_index = space.call_method(w_u, 'find', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == expected - - rexpected = u.rfind(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'rindex', w_v, - space.newint(start), - space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert rexpected == -1 - else: - assert space.int_w(w_index) == rexpected >= 0 - - w_index = space.call_method(w_u, 'rfind', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == rexpected - - expected = u.startswith(v, start) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) - - expected = u.startswith(v, start, start + len1) - w_res = space.call_method(w_u, 'startswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) - - expected = u.endswith(v, start) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start)) - assert w_res is space.newbool(expected) - - expected = u.endswith(v, start, start + len1) - w_res = space.call_method(w_u, 'endswith', w_v, - space.newint(start), - space.newint(start + len1)) - assert w_res is space.newbool(expected) - - class AppTestBytesObject: def setup_class(cls): diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -39,55 +39,6 @@ space.raises_w(space.w_UnicodeEncodeError, space.text_w, w_uni) -try: - from hypothesis import given, strategies -except ImportError: - pass -else: - @given(u=strategies.text(), - start=strategies.integers(min_value=0, max_value=10), - len1=strategies.integers(min_value=-1, max_value=10)) - def test_hypo_index_find(u, start, len1, space): - if start + len1 < 0: - return # skip this case - v = u[start : start + len1] - w_u = space.wrap(u) - w_v = space.wrap(v) - expected = u.find(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'index', w_v, - space.newint(start), - space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert expected == -1 - else: - assert space.int_w(w_index) == expected >= 0 - - w_index = space.call_method(w_u, 'find', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == expected - - rexpected = u.rfind(v, start, start + len1) - try: - w_index = space.call_method(w_u, 'rindex', w_v, - space.newint(start), - space.newint(start + len1)) - except OperationError as e: - if not e.match(space, space.w_ValueError): - raise - assert rexpected == -1 - else: - assert space.int_w(w_index) == rexpected >= 0 - - w_index = space.call_method(w_u, 'rfind', w_v, - space.newint(start), - space.newint(start + len1)) - assert space.int_w(w_index) == rexpected - - class AppTestUnicodeStringStdOnly: def test_compares(self): assert type('a') != type(b'a') From pypy.commits at gmail.com Sat Nov 4 18:16:51 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:16:51 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: * Return a flag from check_utf8. Message-ID: <59fe3c53.3bb0df0a.1515b.2ac3@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92933:a6e6ba074a22 Date: 2017-11-04 10:31 +0100 http://bitbucket.org/pypy/pypy/changeset/a6e6ba074a22/ Log: * Return a flag from check_utf8. * Improve the tests and run it for more examples diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -194,14 +194,14 @@ self.pos = pos def check_ascii(s): - res = _check_ascii(s) + res = first_non_ascii_char(s) if res < 0: return raise CheckError(res) @jit.elidable -def _check_ascii(s): +def first_non_ascii_char(s): for i in range(len(s)): if ord(s[i]) > 0x7F: return i @@ -286,6 +286,9 @@ _invalid_byte_3_of_4 = _invalid_cont_byte _invalid_byte_4_of_4 = _invalid_cont_byte +def _surrogate_bytes(ch1, ch2): + return ch1 == 0xed and ch2 > 0x9f + @enforceargs(allow_surrogates=bool) def _invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates): return (ordch2>>6 != 0x2 or # 0b10 @@ -301,20 +304,22 @@ def check_utf8(s, allow_surrogates, start=0, stop=-1): """Check that 's' is a utf-8-encoded byte string. - Returns the length (number of chars) or raise CheckError. + + Returns the length (number of chars) and flags or raise CheckError. If allow_surrogates is False, then also raise if we see any. Note also codepoints_in_utf8(), which also computes the length faster by assuming that 's' is valid utf-8. """ - res = _check_utf8(s, allow_surrogates, start, stop) + res, flags = _check_utf8(s, allow_surrogates, start, stop) if res >= 0: - return res + return res, flags raise CheckError(~res) @jit.elidable def _check_utf8(s, allow_surrogates, start, stop): pos = start continuation_bytes = 0 + flag = FLAG_ASCII if stop < 0: end = len(s) else: @@ -326,38 +331,44 @@ if ordch1 <= 0x7F: continue + if flag == FLAG_ASCII: + flag = FLAG_REGULAR + if ordch1 <= 0xC1: - return ~(pos - 1) + return ~(pos - 1), 0 if ordch1 <= 0xDF: if pos >= end: - return ~(pos - 1) + return ~(pos - 1), 0 ordch2 = ord(s[pos]) pos += 1 if _invalid_byte_2_of_2(ordch2): - return ~(pos - 2) + return ~(pos - 2), 0 # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz continuation_bytes += 1 continue if ordch1 <= 0xEF: if (pos + 2) > end: - return ~(pos - 1) + return ~(pos - 1), 0 ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) pos += 2 if (_invalid_byte_2_of_3(ordch1, ordch2, allow_surrogates) or _invalid_byte_3_of_3(ordch3)): - return ~(pos - 3) + return ~(pos - 3), 0 + + if allow_surrogates and _surrogate_bytes(ordch1, ordch2): + flag = FLAG_HAS_SURROGATES # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz continuation_bytes += 2 continue if ordch1 <= 0xF4: if (pos + 3) > end: - return ~(pos - 1) + return ~(pos - 1), 0 ordch2 = ord(s[pos]) ordch3 = ord(s[pos + 1]) ordch4 = ord(s[pos + 2]) @@ -366,16 +377,16 @@ if (_invalid_byte_2_of_4(ordch1, ordch2) or _invalid_byte_3_of_4(ordch3) or _invalid_byte_4_of_4(ordch4)): - return ~(pos - 4) + return ~(pos - 4), 0 # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz continuation_bytes += 3 continue - return ~(pos - 1) + return ~(pos - 1), 0 assert pos == end assert pos - continuation_bytes >= 0 - return pos - continuation_bytes + return pos - continuation_bytes, flag @jit.elidable def codepoints_in_utf8(value, start=0, end=sys.maxint): @@ -408,9 +419,16 @@ UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct( 'utf8_loc', ('baseindex', lltype.Signed), + ('flag', lltype.Signed), ('ofs', lltype.FixedSizeArray(lltype.Char, 16)) )) +FLAG_REGULAR = 0 +FLAG_HAS_SURROGATES = 1 +FLAG_ASCII = 2 +# note that we never need index storage if we're pure ascii, but it's useful +# for passing into W_UnicodeObject.__init__ + ASCII_INDEX_STORAGE_BLOCKS = 5 ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE, ASCII_INDEX_STORAGE_BLOCKS, @@ -423,6 +441,9 @@ def null_storage(): return lltype.nullptr(UTF8_INDEX_STORAGE) +UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) +UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) + def create_utf8_index_storage(utf8, utf8len): """ Create an index storage which stores index of each 4th character in utf8 encoded unicode string. diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -28,6 +28,7 @@ else: assert not raised + at settings(max_examples=10000) @given(strategies.binary(), strategies.booleans()) def test_check_utf8(s, allow_surrogates): _test_check_utf8(s, allow_surrogates) @@ -37,19 +38,32 @@ _test_check_utf8(u.encode('utf-8'), allow_surrogates) def _test_check_utf8(s, allow_surrogates): + def _has_surrogates(s): + for u in s.decode('utf8'): + if 0xD800 <= ord(u) <= 0xDB7F: + return True + if 0xDC00 <= ord(u) <= 0xDBFF: + return True + return False + try: u, _ = runicode.str_decode_utf_8(s, len(s), None, final=True, allow_surrogates=allow_surrogates) valid = True except UnicodeDecodeError as e: valid = False - try: - length = rutf8.check_utf8(s, allow_surrogates) - except rutf8.CheckError: + length, flag = rutf8._check_utf8(s, allow_surrogates, 0, len(s)) + if length < 0: assert not valid + assert ~(length) == e.start else: assert valid assert length == len(u) + if flag == rutf8.FLAG_ASCII: + s.decode('ascii') # assert did not raise + elif flag == rutf8.FLAG_HAS_SURROGATES: + assert allow_surrogates + assert _has_surrogates(s) @given(strategies.characters()) def test_next_pos(uni): From pypy.commits at gmail.com Sat Nov 4 18:16:55 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:16:55 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: finish whacking until the objspace tests pass Message-ID: <59fe3c57.04361c0a.dda17.6b03@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92935:47de95da2bbb Date: 2017-11-04 15:26 +0100 http://bitbucket.org/pypy/pypy/changeset/47de95da2bbb/ Log: finish whacking until the objspace tests pass diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -8,3 +8,4 @@ * better flag handling in split/splitlines maybe? * find all the fast-paths that we want to do with utf8 (we only do utf-8 now, not UTF8 or utf8) for decode/encode +* encode_error_handler has XXX diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -20,11 +20,13 @@ @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, w_u, + def raise_unicode_exception_encode(errors, encoding, msg, u, u_len, startingpos, endingpos): + # XXX fix once we stop using runicode.py + flag = _get_flag(u.decode('utf8')) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - w_u, + space.newutf8(u, u_len, flag), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -164,7 +164,7 @@ if isinstance(x, str): return self.newtext(x) if isinstance(x, unicode): - return self.newutf8(x.encode('utf8'), len(x)) + return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): diff --git a/pypy/objspace/std/test/test_index.py b/pypy/objspace/std/test/test_index.py --- a/pypy/objspace/std/test/test_index.py +++ b/pypy/objspace/std/test/test_index.py @@ -1,5 +1,7 @@ from py.test import raises +from rpython.rlib import rutf8 + class AppTest_IndexProtocol: def setup_class(self): w_oldstyle = self.space.appexec([], """(): @@ -263,7 +265,8 @@ class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase): def setup_method(self, method): SeqTestCase.setup_method(self, method) - self.w_seq = self.space.wrap(u"this is a test") + self.w_seq = self.space.newutf8("this is a test", len("this is a test"), + rutf8.FLAG_ASCII) self.w_const = self.space.appexec([], """(): return unicode""") diff --git a/pypy/objspace/std/test/test_lengthhint.py b/pypy/objspace/std/test/test_lengthhint.py --- a/pypy/objspace/std/test/test_lengthhint.py +++ b/pypy/objspace/std/test/test_lengthhint.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.module._collections.interp_deque import W_Deque from pypy.module.itertools.interp_itertools import W_Repeat @@ -71,7 +74,8 @@ self._test_length_hint(self.space.wrap('P' * self.SIZE)) def test_unicode(self): - self._test_length_hint(self.space.wrap(u'Y' * self.SIZE)) + self._test_length_hint(self.space.newutf8('Y' * self.SIZE, self.SIZE, + rutf8.FLAG_ASCII)) def test_tuple(self): self._test_length_hint(self.space.wrap(tuple(self.ITEMS))) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -22,7 +22,7 @@ BytesListStrategy) #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy, # UnicodeListStrategy) - assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy, + assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy, ObjectListStrategy) # mixed unicode and bytes def test_empty_to_any(self): diff --git a/pypy/objspace/std/test/test_obj.py b/pypy/objspace/std/test/test_obj.py --- a/pypy/objspace/std/test/test_obj.py +++ b/pypy/objspace/std/test/test_obj.py @@ -17,7 +17,7 @@ cls.w_cpython_apptest = space.wrap(option.runappdirect and not hasattr(sys, 'pypy_translation_info')) def w_unwrap_wrap_unicode(space, w_obj): - return space.newutf8(space.utf8_w(w_obj), w_obj._length) + return space.newutf8(space.utf8_w(w_obj), w_obj._length, w_obj._get_flag()) cls.w_unwrap_wrap_unicode = space.wrap(gateway.interp2app(w_unwrap_wrap_unicode)) def w_unwrap_wrap_str(space, w_obj): return space.wrap(space.str_w(w_obj)) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1834,7 +1834,7 @@ if not isinstance(w_unistr, W_UnicodeObject): raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr) unistr = w_unistr._utf8 - result = ['\0'] * len(unistr) + result = ['\0'] * w_unistr._length digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] i = 0 @@ -1843,6 +1843,8 @@ uchr = rutf8.codepoint_at_pos(unistr, i) if rutf8.isspace(unistr, i): result[res_pos] = ' ' + res_pos += 1 + i = rutf8.next_codepoint_pos(unistr, i) continue try: result[res_pos] = digits[unicodedb.decimal(uchr)] From pypy.commits at gmail.com Sat Nov 4 18:16:57 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:16:57 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: small fixes, for revisting later once we actually want tests to pass Message-ID: <59fe3c59.4dbbdf0a.8cfd1.b87c@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92936:71debd44669a Date: 2017-11-04 15:31 +0100 http://bitbucket.org/pypy/pypy/changeset/71debd44669a/ Log: small fixes, for revisting later once we actually want tests to pass diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -478,10 +478,10 @@ except rutf8.CheckError as e: # XXX do the way around runicode - we can optimize it later if we # decide we care about obscure cases - xxx res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string), errors, final, state.decode_error_handler) - return space.newtuple([space.newutf8(res, lgt), + flag = unicodehelper._get_flag(res.decode("utf8")) + return space.newtuple([space.newutf8(res, lgt, flag), space.newint(consumed)]) else: return space.newtuple([space.newutf8(string, lgt, flag), @@ -700,7 +700,8 @@ final, state.decode_error_handler, unicode_name_handler) - return space.newtuple([space.newutf8(result, lgt), space.newint(consumed)]) + flag = unicodehelper._get_flag(result.decode('utf8')) + return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) # ____________________________________________________________ # Unicode-internal diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -74,11 +74,12 @@ space.newtext(e.reason)])) def wrap_unicodeencodeerror(space, e, input, inputlen, name): + flag = 13 raise OperationError( space.w_UnicodeEncodeError, space.newtuple([ space.newtext(name), - space.newutf8(input, inputlen), + space.newutf8(input, inputlen, flag), space.newint(e.start), space.newint(e.end), space.newtext(e.reason)])) diff --git a/pypy/module/pyexpat/interp_pyexpat.py b/pypy/module/pyexpat/interp_pyexpat.py --- a/pypy/module/pyexpat/interp_pyexpat.py +++ b/pypy/module/pyexpat/interp_pyexpat.py @@ -478,8 +478,8 @@ # I suppose this is a valid utf8, but there is noone to check # and noone to catch an error either try: - lgt = rutf8.check_utf8(s, True) - return space.newutf8(s, lgt) + lgt, flag = rutf8.check_utf8(s, True) + return space.newutf8(s, lgt, flag) except rutf8.CheckError: from pypy.interpreter import unicodehelper # get the correct error msg From pypy.commits at gmail.com Sat Nov 4 18:16:59 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:16:59 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: first attempt at fixing the unicode surrogate mess Message-ID: <59fe3c5b.4f931c0a.bc56f.0826@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92937:0c93ee971f62 Date: 2017-11-04 19:07 +0100 http://bitbucket.org/pypy/pypy/changeset/0c93ee971f62/ Log: first attempt at fixing the unicode surrogate mess diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -1,4 +1,3 @@ -* unskip tests in test_unicodeobject.py * rutf8.prev_codepoint_pos should use r_uint * find a better way to run "find" without creating the index storage, if one is not already readily available @@ -9,3 +8,4 @@ * find all the fast-paths that we want to do with utf8 (we only do utf-8 now, not UTF8 or utf8) for decode/encode * encode_error_handler has XXX +* reenable list strategies for ascii-only unicode diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -72,8 +72,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - utf, lgt = unicodehelper.decode_utf8(space, substr) - w_u = space.newutf8(utf, lgt) + utf, (lgt, flag) = unicodehelper.decode_utf8(space, substr) + w_u = space.newutf8(utf, lgt, flag) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -45,14 +45,14 @@ def _has_surrogate(u): for c in u: - if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F: + if 0xD800 <= ord(c) <= 0xDFFF: return True return False def _get_flag(u): flag = rutf8.FLAG_ASCII for c in u: - if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F: + if 0xD800 <= ord(c) <= 0xDFFF: return rutf8.FLAG_HAS_SURROGATES if ord(c) >= 0x80: flag = rutf8.FLAG_REGULAR @@ -143,7 +143,7 @@ def str_decode_ascii(s, slen, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, slen, len(s) + return s, slen, len(s), rutf8.FLAG_ASCII except rutf8.CheckError: w = DecodeWrapper((errorhandler)) u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle) diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -30,8 +30,8 @@ raise oefmt(space.w_ValueError, "unichr() arg out of range") if code < 0x80: flag = rutf8.FLAG_ASCII - elif 0xDB80 <= code <= 0xCBFF or 0xD800 <= code <= 0xDB7F: - flag = rutf8.FLAG_HAS_SURROGATE + elif 0xD800 <= code <= 0xDFFF: + flag = rutf8.FLAG_HAS_SURROGATES else: flag = rutf8.FLAG_REGULAR return space.newutf8(s, 1, flag) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -516,8 +516,9 @@ return w_obj.listview_unicode() if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject: return w_obj.listview_unicode() - #if isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj): - # return w_obj.listview_unicode() + if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj) + and w_obj.is_ascii()): + return w_obj.listview_unicode() if isinstance(w_obj, W_ListObject) and self._uses_list_iter(w_obj): return w_obj.getitems_unicode() return None diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -27,7 +27,6 @@ assert len(warnings) == 2 def test_listview_unicode(self): - py.test.skip("skip for new") w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) assert self.space.listview_unicode(w_str) == list(u"abcd") @@ -662,7 +661,6 @@ assert unicode('+AB', 'utf-7', 'replace') == u'\ufffd' def test_codecs_utf8(self): - skip("unskip this before merge") assert u''.encode('utf-8') == '' assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac' assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82' @@ -695,7 +693,6 @@ assert unicode('\xe2\x82\xac', 'utf-8') == u'\u20ac' def test_codecs_errors(self): - skip("some nonsense in handling of ignore and replace") # Error handling (encoding) raises(UnicodeError, u'Andr\202 x'.encode, 'ascii') raises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -93,6 +93,8 @@ return space.text_w(space.str(self)) def utf8_w(self, space): + if self._has_surrogates(): + return rutf8.reencode_utf8_with_surrogates(self._utf8) return self._utf8 def readbuf_w(self, space): @@ -115,8 +117,8 @@ charbuf_w = str_w def listview_unicode(self): - XXX # fix at some point - return _create_list_from_unicode(self._value) + assert self.is_ascii() + return _create_list_from_unicode(self._utf8) def ord(self, space): if self._len() != 1: @@ -410,7 +412,7 @@ "or unicode") try: if codepoint >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_NORMAL) + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(result, codepoint, allow_surrogates=True) result_length += 1 @@ -632,7 +634,7 @@ return rutf8.FLAG_REGULAR def _get_flag(self): - if self._is_ascii(): + if self.is_ascii(): return rutf8.FLAG_ASCII elif self._has_surrogates(): return rutf8.FLAG_HAS_SURROGATES @@ -977,7 +979,7 @@ end = rutf8.next_codepoint_pos(self._utf8, start) return W_UnicodeObject(self._utf8[start:end], 1, self._get_flag()) - def _is_ascii(self): + def is_ascii(self): return self._index_storage is rutf8.UTF8_IS_ASCII def _has_surrogates(self): @@ -986,7 +988,8 @@ self._index_storage.flag == rutf8.FLAG_HAS_SURROGATES)) def _index_to_byte(self, index): - if self._is_ascii(): + if self.is_ascii(): + assert index >= 0 return index return rutf8.codepoint_position_at_index( self._utf8, self._get_index_storage(), index) @@ -1195,7 +1198,7 @@ assert False, "always raises" return space.newbytes(s) if ((encoding is None and space.sys.defaultencoding == 'utf8') or - encoding == 'utf-8'): + encoding == 'utf-8' or encoding == 'utf8'): return space.newbytes(space.utf8_w(w_object)) if w_encoder is None: from pypy.module._codecs.interp_codecs import lookup_codec diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -388,6 +388,34 @@ assert pos - continuation_bytes >= 0 return pos - continuation_bytes, flag +def reencode_utf8_with_surrogates(utf8): + """ Receiving valid UTF8 which contains surrogates, combine surrogate + pairs into correct UTF8 with pairs collpased. This is a rare case + and you should not be using surrogate pairs in the first place, + so the performance here is a bit secondary + """ + s = StringBuilder(len(utf8)) + stop = len(utf8) + i = 0 + while i < stop: + uchr = codepoint_at_pos(utf8, i) + if 0xD800 <= uchr <= 0xDBFF: + high = uchr + i = next_codepoint_pos(utf8, i) + if i >= stop: + unichr_as_utf8_append(s, uchr, True) + break + low = codepoint_at_pos(utf8, i) + if 0xDC00 <= low <= 0xDFFF: + uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) + i = next_codepoint_pos(utf8, i) + # else not really a surrogate pair, just append high + else: + i = next_codepoint_pos(utf8, i) + unichr_as_utf8_append(s, uchr, True) + return s.build() + + @jit.elidable def codepoints_in_utf8(value, start=0, end=sys.maxint): """Return the number of codepoints in the UTF-8 byte string diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -40,9 +40,7 @@ def _test_check_utf8(s, allow_surrogates): def _has_surrogates(s): for u in s.decode('utf8'): - if 0xD800 <= ord(u) <= 0xDB7F: - return True - if 0xDC00 <= ord(u) <= 0xDBFF: + if 0xD800 <= ord(u) <= 0xDFFF: return True return False From pypy.commits at gmail.com Sat Nov 4 18:16:53 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:16:53 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: progress on having flags correctly propagated, almost there Message-ID: <59fe3c55.5d87df0a.896e7.cb97@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92934:29ce3a4ea76f Date: 2017-11-04 14:38 +0100 http://bitbucket.org/pypy/pypy/changeset/29ce3a4ea76f/ Log: progress on having flags correctly propagated, almost there diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -4,3 +4,7 @@ if one is not already readily available * fix _pypyjson * fix cpyext +* write the correct jit_elidable in _get_index_storage +* better flag handling in split/splitlines maybe? +* find all the fast-paths that we want to do with utf8 (we only do + utf-8 now, not UTF8 or utf8) for decode/encode diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1764,8 +1764,10 @@ return self.realutf8_w(w_obj).decode('utf8') def newunicode(self, u): + from pypy.interpreter import unicodehelper assert isinstance(u, unicode) - return self.newutf8(u.encode("utf8"), len(u)) + # XXX let's disallow that + return self.newutf8(u.encode("utf8"), len(u), unicodehelper._get_flag(u)) def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -59,10 +59,11 @@ else: substr = decode_unicode_utf8(space, s, ps, q) if rawmode: - v, length = unicodehelper.decode_raw_unicode_escape(space, substr) + r = unicodehelper.decode_raw_unicode_escape(space, substr) else: - v, length = unicodehelper.decode_unicode_escape(space, substr) - return space.newutf8(v, length) + r = unicodehelper.decode_unicode_escape(space, substr) + v, length, flag = r + return space.newutf8(v, length, flag) need_encoding = (encoding is not None and encoding != "utf-8" and encoding != "utf8" and diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -20,11 +20,11 @@ @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, u, u_len, + def raise_unicode_exception_encode(errors, encoding, msg, w_u, startingpos, endingpos): raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newutf8(u, u_len), + w_u, space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) @@ -41,6 +41,21 @@ from pypy.objspace.std.unicodeobject import encode_object return encode_object(space, w_data, encoding, errors) +def _has_surrogate(u): + for c in u: + if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F: + return True + return False + +def _get_flag(u): + flag = rutf8.FLAG_ASCII + for c in u: + if 0xDB80 <= ord(c) <= 0xCBFF or 0xD800 <= ord(c) <= 0xDB7F: + return rutf8.FLAG_HAS_SURROGATES + if ord(c) >= 0x80: + flag = rutf8.FLAG_REGULAR + return flag + # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) @@ -52,7 +67,14 @@ final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle, unicodedata_handler=unicodedata_handler) # XXX argh. we want each surrogate to be encoded separately - return ''.join([u.encode('utf8') for u in result_u]), len(result_u) + utf8 = ''.join([u.encode('utf8') for u in result_u]) + if rutf8.first_non_ascii_char(utf8) == -1: + flag = rutf8.FLAG_ASCII + elif _has_surrogate(result_u): + flag = rutf8.FLAG_HAS_SURROGATES + else: + flag = rutf8.FLAG_REGULAR + return utf8, len(result_u), flag def decode_raw_unicode_escape(space, string): # XXX pick better length, maybe @@ -61,7 +83,14 @@ string, len(string), "strict", final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle) # XXX argh. we want each surrogate to be encoded separately - return ''.join([u.encode('utf8') for u in result_u]), len(result_u) + utf8 = ''.join([u.encode('utf8') for u in result_u]) + if rutf8.first_non_ascii_char(utf8) == -1: + flag = rutf8.FLAG_ASCII + elif _has_surrogate(result_u): + flag = rutf8.FLAG_HAS_SURROGATES + else: + flag = rutf8.FLAG_REGULAR + return utf8, len(result_u), flag def check_ascii_or_raise(space, string): try: @@ -78,12 +107,12 @@ # you still get two surrogate unicode characters in the result. # These are the Python2 rules; Python3 differs. try: - length = rutf8.check_utf8(string, allow_surrogates=True) + length, flag = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError as e: decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, e.pos, e.pos + 1) assert False, "unreachable" - return length + return length, flag def encode_utf8(space, uni): # DEPRECATED @@ -116,7 +145,7 @@ except rutf8.CheckError: w = DecodeWrapper((errorhandler)) u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle) - return u.encode('utf8'), pos, len(u) + return u.encode('utf8'), pos, len(u), _get_flag(u) # XXX wrappers, think about speed @@ -139,14 +168,14 @@ w = DecodeWrapper(errorhandler) u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle, runicode.allow_surrogate_by_default) - return u.encode('utf8'), pos, len(u) + return u.encode('utf8'), pos, len(u), _get_flag(u) def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler): w = DecodeWrapper(errorhandler) u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final, w.handle, ud_handler) - return u.encode('utf8'), pos, len(u) + return u.encode('utf8'), pos, len(u), _get_flag(u) def setup_new_encoders(encoding): encoder_name = 'utf8_encode_' + encoding @@ -160,7 +189,7 @@ def decoder(s, slen, errors, final, errorhandler): w = DecodeWrapper((errorhandler)) u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle) - return u.encode('utf8'), pos, len(u) + return u.encode('utf8'), pos, len(u), _get_flag(u) encoder.__name__ = encoder_name decoder.__name__ = decoder_name if encoder_name not in globals(): diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -28,7 +28,13 @@ s = rutf8.unichr_as_utf8(code, allow_surrogates=True) except ValueError: raise oefmt(space.w_ValueError, "unichr() arg out of range") - return space.newutf8(s, 1) + if code < 0x80: + flag = rutf8.FLAG_ASCII + elif 0xDB80 <= code <= 0xCBFF or 0xD800 <= code <= 0xDB7F: + flag = rutf8.FLAG_HAS_SURROGATE + else: + flag = rutf8.FLAG_REGULAR + return space.newutf8(s, 1, flag) def len(space, w_obj): "len(object) -> integer\n\nReturn the number of items of a sequence or mapping." diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -39,8 +39,8 @@ w_input = space.newbytes(input) else: w_cls = space.w_UnicodeEncodeError - length = rutf8.check_utf8(input, allow_surrogates=True) - w_input = space.newutf8(input, length) + length, flag = rutf8.check_utf8(input, allow_surrogates=True) + w_input = space.newutf8(input, length, flag) w_exc = space.call_function( w_cls, space.newtext(encoding), @@ -189,7 +189,7 @@ def ignore_errors(space, w_exc): check_exception(space, w_exc) w_end = space.getattr(w_exc, space.newtext('end')) - return space.newtuple([space.newutf8('', 0), w_end]) + return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), w_end]) REPLACEMENT = u'\ufffd'.encode('utf8') @@ -200,13 +200,13 @@ size = space.int_w(w_end) - space.int_w(w_start) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): text = '?' * size - return space.newtuple([space.newutf8(text, size), w_end]) + return space.newtuple([space.newutf8(text, size, rutf8.FLAG_ASCII), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError): text = REPLACEMENT - return space.newtuple([space.newutf8(text, 1), w_end]) + return space.newtuple([space.newutf8(text, 1, rutf8.FLAG_REGULAR), w_end]) elif space.isinstance_w(w_exc, space.w_UnicodeTranslateError): text = REPLACEMENT * size - return space.newtuple([space.newutf8(text, size), w_end]) + return space.newtuple([space.newutf8(text, size, rutf8.FLAG_REGULAR), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -403,9 +403,9 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result, consumed, length = func(string, len(string), errors, - final, state.decode_error_handler) - return space.newtuple([space.newutf8(result, length), + result, consumed, length, flag = func(string, len(string), errors, + final, state.decode_error_handler) + return space.newtuple([space.newutf8(result, length, flag), space.newint(consumed)]) wrap_decoder.func_name = rname globals()[name] = wrap_decoder @@ -448,7 +448,7 @@ # "allow_surrogates=True" @unwrap_spec(utf8='utf8', errors='text_or_none') def utf_8_encode(space, utf8, errors="strict"): - length = rutf8.check_utf8(utf8, allow_surrogates=True) + length, _ = rutf8.check_utf8(utf8, allow_surrogates=True) return space.newtuple([space.newbytes(utf8), space.newint(length)]) #@unwrap_spec(uni=unicode, errors='text_or_none') #def utf_8_encode(space, uni, errors="strict"): @@ -474,16 +474,17 @@ state = space.fromcache(CodecState) # call the fast version for checking try: - lgt = rutf8.check_utf8(string, allow_surrogates=True) + lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError as e: # XXX do the way around runicode - we can optimize it later if we # decide we care about obscure cases + xxx res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt), space.newint(consumed)]) else: - return space.newtuple([space.newutf8(string, lgt), + return space.newtuple([space.newutf8(string, lgt, flag), space.newint(len(string))]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py --- a/pypy/objspace/std/marshal_impl.py +++ b/pypy/objspace/std/marshal_impl.py @@ -403,8 +403,8 @@ @unmarshaller(TYPE_UNICODE) def unmarshal_unicode(space, u, tc): arg = u.get_str() - length = unicodehelper.check_utf8_or_raise(space, arg) - return space.newutf8(arg, length) + length, flag = unicodehelper.check_utf8_or_raise(space, arg) + return space.newutf8(arg, length, flag) @marshaller(W_SetObject) def marshal_set(space, w_set, m): diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -317,8 +317,8 @@ for utf in lst: assert utf is not None assert isinstance(utf, str) - length = rutf8.check_utf8(utf, allow_surrogates=True) - res_w.append(self.newutf8(utf, length)) + length, flag = rutf8.check_utf8(utf, allow_surrogates=True) + res_w.append(self.newutf8(utf, length, flag)) return self.newlist(res_w) def newlist_int(self, list_i): @@ -369,10 +369,10 @@ return self.w_None return self.newtext(s) - def newutf8(self, utf8s, length): + def newutf8(self, utf8s, length, flag): assert utf8s is not None assert isinstance(utf8s, str) - return W_UnicodeObject(utf8s, length) + return W_UnicodeObject(utf8s, length, flag) def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -3,6 +3,7 @@ import py import sys from hypothesis import given, strategies, settings, example +from rpython.rlib import rutf8 from pypy.interpreter.error import OperationError @@ -27,12 +28,12 @@ def test_listview_unicode(self): py.test.skip("skip for new") - w_str = self.space.wrap(u'abcd') + w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) assert self.space.listview_unicode(w_str) == list(u"abcd") def test_new_shortcut(self): space = self.space - w_uni = self.space.wrap(u'abcd') + w_uni = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) w_new = space.call_method( space.w_unicode, "__new__", space.w_unicode, w_uni) assert w_new is w_uni @@ -44,8 +45,8 @@ return # skip this case v = u[start : start + len1] space = self.space - w_u = space.wrap(u) - w_v = space.wrap(v) + w_u = space.newutf8(u.encode('utf8'), len(u), rutf8.FLAG_REGULAR) + w_v = space.newutf8(v.encode('utf8'), len(v), rutf8.FLAG_REGULAR) expected = u.find(v, start, start + len1) try: w_index = space.call_method(w_u, 'index', w_v, diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -36,14 +36,24 @@ _immutable_fields_ = ['_utf8'] @enforceargs(utf8str=str) - def __init__(self, utf8str, length): + def __init__(self, utf8str, length, flag): assert isinstance(utf8str, str) assert length >= 0 self._utf8 = utf8str self._length = length - self._index_storage = rutf8.null_storage() - #if not we_are_translated(): - # assert rutf8.check_utf8(utf8str, allow_surrogates=True) == length + if flag == rutf8.FLAG_ASCII: + self._index_storage = rutf8.UTF8_IS_ASCII + elif flag == rutf8.FLAG_HAS_SURROGATES: + self._index_storage = rutf8.UTF8_HAS_SURROGATES + else: + assert flag == rutf8.FLAG_REGULAR + self._index_storage = rutf8.null_storage() + # the storage can be one of: + # - null, unicode with no surrogates + # - rutf8.UTF8_HAS_SURROGATES + # - rutf8.UTF8_IS_ASCII + # - malloced object, which means it has index, then + # _index_storage.flags determines the kind def __repr__(self): """representation for debugging purposes""" @@ -222,7 +232,11 @@ assert isinstance(w_value, W_UnicodeObject) w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype) - W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length) + W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length, + w_value._get_flag()) + if w_value._index_storage: + # copy the storage if it's there + w_newobj._index_storage = w_value._index_storage return w_newobj def descr_repr(self, space): @@ -326,29 +340,33 @@ def descr_swapcase(self, space): selfvalue = self._utf8 builder = StringBuilder(len(selfvalue)) + flag = self._get_flag() i = 0 while i < len(selfvalue): ch = rutf8.codepoint_at_pos(selfvalue, i) i = rutf8.next_codepoint_pos(selfvalue, i) if unicodedb.isupper(ch): - rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(ch)) + ch = unicodedb.tolower(ch) elif unicodedb.islower(ch): - rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(ch)) - else: - rutf8.unichr_as_utf8_append(builder, ch) - return W_UnicodeObject(builder.build(), self._length) + ch = unicodedb.toupper(ch) + if ch >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + rutf8.unichr_as_utf8_append(builder, ch) + return W_UnicodeObject(builder.build(), self._length, flag) def descr_title(self, space): if len(self._utf8) == 0: return self - return W_UnicodeObject(self.title(self._utf8), self._len()) + utf8, flag = self.title_unicode(self._utf8) + return W_UnicodeObject(utf8, self._len(), flag) @jit.elidable - def title(self, value): + def title_unicode(self, value): input = self._utf8 builder = StringBuilder(len(input)) i = 0 previous_is_cased = False + flag = self._get_flag() while i < len(input): ch = rutf8.codepoint_at_pos(input, i) i = rutf8.next_codepoint_pos(input, i) @@ -356,14 +374,17 @@ ch = unicodedb.totitle(ch) else: ch = unicodedb.tolower(ch) + if ch >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, ch) previous_is_cased = unicodedb.iscased(ch) - return builder.build() + return builder.build(), flag def descr_translate(self, space, w_table): input = self._utf8 result = StringBuilder(len(input)) result_length = 0 + flag = self._get_flag() i = 0 while i < len(input): codepoint = rutf8.codepoint_at_pos(input, i) @@ -380,6 +401,7 @@ codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): result.append(w_newval._utf8) + flag = self._combine_flags(flag, w_newval._get_flag()) result_length += w_newval._length continue else: @@ -387,13 +409,15 @@ "character mapping must return integer, None " "or unicode") try: + if codepoint >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_NORMAL) rutf8.unichr_as_utf8_append(result, codepoint, allow_surrogates=True) result_length += 1 except ValueError: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") - return W_UnicodeObject(result.build(), result_length) + return W_UnicodeObject(result.build(), result_length, flag) def descr_find(self, space, w_sub, w_start=None, w_end=None): w_result = self._unwrap_and_search(space, w_sub, w_start, w_end) @@ -472,7 +496,7 @@ newlen += dist oldtoken = token - return W_UnicodeObject(expanded, newlen) + return W_UnicodeObject(expanded, newlen, self._get_flag()) _StringMethods_descr_join = descr_join def descr_join(self, space, w_list): @@ -506,11 +530,14 @@ def descr_lower(self, space): builder = StringBuilder(len(self._utf8)) pos = 0 + flag = self._get_flag() while pos < len(self._utf8): lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) + if lower >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates? pos = rutf8.next_codepoint_pos(self._utf8, pos) - return W_UnicodeObject(builder.build(), self._len()) + return W_UnicodeObject(builder.build(), self._len(), flag) def descr_isdecimal(self, space): return self._is_generic(space, '_isdecimal') @@ -595,6 +622,22 @@ return True return endswith(value, prefix, start, end) + @staticmethod + def _combine_flags(self_flag, other_flag): + if self_flag == rutf8.FLAG_ASCII and other_flag == rutf8.FLAG_ASCII: + return rutf8.FLAG_ASCII + elif (self_flag == rutf8.FLAG_HAS_SURROGATES or + other_flag == rutf8.FLAG_HAS_SURROGATES): + return rutf8.FLAG_HAS_SURROGATES + return rutf8.FLAG_REGULAR + + def _get_flag(self): + if self._is_ascii(): + return rutf8.FLAG_ASCII + elif self._has_surrogates(): + return rutf8.FLAG_HAS_SURROGATES + return rutf8.FLAG_REGULAR + def descr_add(self, space, w_other): try: w_other = self.convert_arg_to_w_unicode(space, w_other) @@ -602,8 +645,9 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise + flag = self._combine_flags(self._get_flag(), w_other._get_flag()) return W_UnicodeObject(self._utf8 + w_other._utf8, - self._len() + w_other._len()) + self._len() + w_other._len(), flag) @jit.look_inside_iff(lambda self, space, list_w, size: jit.loop_unrolling_heuristic(list_w, size)) @@ -613,6 +657,7 @@ prealloc_size = len(value) * (size - 1) unwrapped = newlist_hint(size) + flag = self._get_flag() for i in range(size): w_s = list_w[i] check_item = self._join_check_item(space, w_s) @@ -625,6 +670,7 @@ # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) + flag = self._combine_flags(flag, w_u._get_flag()) unwrapped.append(w_u._utf8) lgt += w_u._length prealloc_size += len(unwrapped[i]) @@ -634,7 +680,7 @@ if value and i != 0: sb.append(value) sb.append(unwrapped[i]) - return W_UnicodeObject(sb.build(), lgt) + return W_UnicodeObject(sb.build(), lgt, flag) @unwrap_spec(keepends=bool) def descr_splitlines(self, space, keepends=False): @@ -663,28 +709,33 @@ lgt += line_end_chars assert eol >= 0 assert sol >= 0 - strs_w.append(W_UnicodeObject(value[sol:eol], lgt)) + # XXX we can do better with flags here, if we want to + strs_w.append(W_UnicodeObject(value[sol:eol], lgt, self._get_flag())) return space.newlist(strs_w) def descr_upper(self, space): value = self._utf8 builder = StringBuilder(len(value)) + flag = self._get_flag() i = 0 while i < len(value): uchar = rutf8.codepoint_at_pos(value, i) + uchar = unicodedb.toupper(uchar) + if uchar >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) i = rutf8.next_codepoint_pos(value, i) - rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar)) - return W_UnicodeObject(builder.build(), self._length) + rutf8.unichr_as_utf8_append(builder, uchar) + return W_UnicodeObject(builder.build(), self._length, flag) @unwrap_spec(width=int) def descr_zfill(self, space, width): selfval = self._utf8 if len(selfval) == 0: - return W_UnicodeObject('0' * width, width) + return W_UnicodeObject('0' * width, width, rutf8.FLAG_ASCII) num_zeros = width - self._len() if num_zeros <= 0: # cannot return self, in case it is a subclass of str - return W_UnicodeObject(selfval, self._len()) + return W_UnicodeObject(selfval, self._len(), self._get_flag()) builder = StringBuilder(num_zeros + len(selfval)) if len(selfval) > 0 and (selfval[0] == '+' or selfval[0] == '-'): # copy sign to first position @@ -694,7 +745,7 @@ start = 0 builder.append_multiple_char('0', num_zeros) builder.append_slice(selfval, start, len(selfval)) - return W_UnicodeObject(builder.build(), width) + return W_UnicodeObject(builder.build(), width, self._get_flag()) @unwrap_spec(maxsplit=int) def descr_split(self, space, w_sep=None, maxsplit=-1): @@ -753,7 +804,7 @@ break i += 1 byte_pos = self._index_to_byte(start + i * step) - return W_UnicodeObject(builder.build(), sl) + return W_UnicodeObject(builder.build(), sl, self._get_flag()) def descr_getslice(self, space, w_start, w_stop): start, stop = normalize_simple_slice( @@ -770,22 +821,30 @@ assert stop >= 0 byte_start = self._index_to_byte(start) byte_stop = self._index_to_byte(stop) - return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start) + return W_UnicodeObject(self._utf8[byte_start:byte_stop], stop - start, + self._get_flag()) def descr_capitalize(self, space): value = self._utf8 if len(value) == 0: return self._empty() + flag = self._get_flag() builder = StringBuilder(len(value)) uchar = rutf8.codepoint_at_pos(value, 0) i = rutf8.next_codepoint_pos(value, 0) - rutf8.unichr_as_utf8_append(builder, unicodedb.toupper(uchar)) + ch = unicodedb.toupper(uchar) + rutf8.unichr_as_utf8_append(builder, ch) + if ch >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) while i < len(value): uchar = rutf8.codepoint_at_pos(value, i) i = rutf8.next_codepoint_pos(value, i) - rutf8.unichr_as_utf8_append(builder, unicodedb.tolower(uchar)) - return W_UnicodeObject(builder.build(), self._len()) + ch = unicodedb.tolower(uchar) + rutf8.unichr_as_utf8_append(builder, ch) + if ch >= 0x80: + flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + return W_UnicodeObject(builder.build(), self._len(), flag) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): @@ -804,7 +863,7 @@ centered = value d = 0 - return W_UnicodeObject(centered, self._len() + d) + return W_UnicodeObject(centered, self._len() + d, self._get_flag()) def descr_count(self, space, w_sub, w_start=None, w_end=None): value = self._utf8 @@ -830,11 +889,11 @@ if pos < 0: return space.newtuple([self, self._empty(), self._empty()]) else: - lgt = rutf8.check_utf8(value, True, stop=pos) + lgt, _ = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( - [W_UnicodeObject(value[0:pos], lgt), w_sub, + [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], - self._len() - lgt - sublen)]) + self._len() - lgt - sublen, self._get_flag())]) def descr_rpartition(self, space, w_sub): value = self._utf8 @@ -848,11 +907,11 @@ if pos < 0: return space.newtuple([self._empty(), self._empty(), self]) else: - lgt = rutf8.check_utf8(value, True, stop=pos) + lgt, _ = rutf8.check_utf8(value, True, stop=pos) return space.newtuple( - [W_UnicodeObject(value[0:pos], lgt), w_sub, + [W_UnicodeObject(value[0:pos], lgt, self._get_flag()), w_sub, W_UnicodeObject(value[pos + len(sub._utf8):len(value)], - self._len() - lgt - sublen)]) + self._len() - lgt - sublen, self._get_flag())]) @unwrap_spec(count=int) def descr_replace(self, space, w_old, w_new, count=-1): @@ -870,8 +929,9 @@ except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") + flag = self._combine_flags(self._get_flag(), w_by._get_flag()) newlength = self._length + replacements * (w_by._length - w_sub._length) - return W_UnicodeObject(res, newlength) + return W_UnicodeObject(res, newlength, flag) def descr_mul(self, space, w_times): try: @@ -883,16 +943,29 @@ if times <= 0: return self._empty() if len(self._utf8) == 1: - return W_UnicodeObject(self._utf8[0] * times, times) - return W_UnicodeObject(self._utf8 * times, times * self._len()) + return W_UnicodeObject(self._utf8[0] * times, times, + self._get_flag()) + return W_UnicodeObject(self._utf8 * times, times * self._len(), + self._get_flag()) descr_rmul = descr_mul def _get_index_storage(self): - storage = jit.conditional_call_elidable(self._index_storage, - rutf8.create_utf8_index_storage, self._utf8, self._length) + # XXX write the correct jit.elidable + condition = (self._index_storage == rutf8.null_storage() or + not bool(self._index_storage.contents)) + if condition: + storage = rutf8.create_utf8_index_storage(self._utf8, self._length) + else: + storage = self._index_storage if not jit.isconstant(self): + prev_storage = self._index_storage self._index_storage = storage + if prev_storage == rutf8.UTF8_HAS_SURROGATES: + flag = rutf8.FLAG_HAS_SURROGATES + else: + flag = rutf8.FLAG_REGULAR + self._index_storage.flag = flag return storage def _getitem_result(self, space, index): @@ -902,9 +975,19 @@ raise oefmt(space.w_IndexError, "string index out of range") start = self._index_to_byte(index) end = rutf8.next_codepoint_pos(self._utf8, start) - return W_UnicodeObject(self._utf8[start:end], 1) + return W_UnicodeObject(self._utf8[start:end], 1, self._get_flag()) + + def _is_ascii(self): + return self._index_storage is rutf8.UTF8_IS_ASCII + + def _has_surrogates(self): + return (self._index_storage is rutf8.UTF8_HAS_SURROGATES or + (bool(self._index_storage) and + self._index_storage.flag == rutf8.FLAG_HAS_SURROGATES)) def _index_to_byte(self, index): + if self._is_ascii(): + return index return rutf8.codepoint_position_at_index( self._utf8, self._get_index_storage(), index) @@ -967,6 +1050,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "rjust() argument 2 must be a single character") + flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - lgt if d > 0: if len(w_fillchar._utf8) == 1: @@ -974,9 +1058,9 @@ value = d * w_fillchar._utf8[0] + value else: value = d * w_fillchar._utf8 + value - return W_UnicodeObject(value, width) + return W_UnicodeObject(value, width, flag) - return W_UnicodeObject(value, lgt) + return W_UnicodeObject(value, lgt, flag) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_ljust(self, space, width, w_fillchar): @@ -985,6 +1069,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "ljust() argument 2 must be a single character") + flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - self._len() if d > 0: if len(w_fillchar._utf8) == 1: @@ -992,9 +1077,9 @@ value = value + d * w_fillchar._utf8[0] else: value = value + d * w_fillchar._utf8 - return W_UnicodeObject(value, width) + return W_UnicodeObject(value, width, flag) - return W_UnicodeObject(value, self._len()) + return W_UnicodeObject(value, self._len(), flag) def _utf8_sliced(self, start, stop, lgt): assert start >= 0 @@ -1002,7 +1087,7 @@ #if start == 0 and stop == len(s) and space.is_w(space.type(orig_obj), # space.w_bytes): # return orig_obj - return W_UnicodeObject(self._utf8[start:stop], lgt) + return W_UnicodeObject(self._utf8[start:stop], lgt, self._get_flag()) def _strip_none(self, space, left, right): "internal function called by str_xstrip methods" @@ -1050,7 +1135,7 @@ return self._utf8_sliced(lpos, rpos, lgt) def descr_getnewargs(self, space): - return space.newtuple([W_UnicodeObject(self._utf8, self._length)]) + return space.newtuple([W_UnicodeObject(self._utf8, self._length, self._get_flag())]) @@ -1135,11 +1220,11 @@ if encoding == 'ascii': s = space.charbuf_w(w_obj) unicodehelper.check_ascii_or_raise(space, s) - return space.newutf8(s, len(s)) + return space.newutf8(s, len(s), rutf8.FLAG_ASCII) if encoding == 'utf-8': s = space.charbuf_w(w_obj) - lgt = unicodehelper.check_utf8_or_raise(space, s) - return space.newutf8(s, lgt) + lgt, flag = unicodehelper.check_utf8_or_raise(space, s) + return space.newutf8(s, lgt, flag) w_codecs = space.getbuiltinmodule("_codecs") w_decode = space.getattr(w_codecs, space.newtext("decode")) if errors is None: @@ -1194,7 +1279,7 @@ return unicode_from_encoded_object(space, w_bytes, encoding, "strict") s = space.bytes_w(w_bytes) unicodehelper.check_ascii_or_raise(space, s) - return W_UnicodeObject(s, len(s)) + return W_UnicodeObject(s, len(s), rutf8.FLAG_ASCII) class UnicodeDocstrings: @@ -1741,7 +1826,7 @@ return [s for s in value] -W_UnicodeObject.EMPTY = W_UnicodeObject('', 0) +W_UnicodeObject.EMPTY = W_UnicodeObject('', 0, rutf8.FLAG_ASCII) # Helper for converting int/long diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -305,14 +305,14 @@ def check_utf8(s, allow_surrogates, start=0, stop=-1): """Check that 's' is a utf-8-encoded byte string. - Returns the length (number of chars) and flags or raise CheckError. + Returns the length (number of chars) and flag or raise CheckError. If allow_surrogates is False, then also raise if we see any. Note also codepoints_in_utf8(), which also computes the length faster by assuming that 's' is valid utf-8. """ - res, flags = _check_utf8(s, allow_surrogates, start, stop) + res, flag = _check_utf8(s, allow_surrogates, start, stop) if res >= 0: - return res, flags + return res, flag raise CheckError(~res) @jit.elidable @@ -416,12 +416,13 @@ return False -UTF8_INDEX_STORAGE = lltype.GcArray(lltype.Struct( - 'utf8_loc', +UTF8_INDEX_STORAGE = lltype.GcStruct('utf8_loc', + ('flag', lltype.Signed), + ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct( + 'utf8_loc_elem', ('baseindex', lltype.Signed), - ('flag', lltype.Signed), - ('ofs', lltype.FixedSizeArray(lltype.Char, 16)) - )) + ('ofs', lltype.FixedSizeArray(lltype.Char, 16))) + )))) FLAG_REGULAR = 0 FLAG_HAS_SURROGATES = 1 @@ -429,43 +430,47 @@ # note that we never need index storage if we're pure ascii, but it's useful # for passing into W_UnicodeObject.__init__ -ASCII_INDEX_STORAGE_BLOCKS = 5 -ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE, - ASCII_INDEX_STORAGE_BLOCKS, - immortal=True) -for _i in range(ASCII_INDEX_STORAGE_BLOCKS): - ASCII_INDEX_STORAGE[_i].baseindex = _i * 64 - for _j in range(16): - ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1) +#ASCII_INDEX_STORAGE_BLOCKS = 5 +#ASCII_INDEX_STORAGE = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO, +# ASCII_INDEX_STORAGE_BLOCKS, +# immortal=True) +#for _i in range(ASCII_INDEX_STORAGE_BLOCKS): +# ASCII_INDEX_STORAGE[_i].baseindex = _i * 64 +# for _j in range(16): +# ASCII_INDEX_STORAGE[_i].ofs[_j] = chr(_j * 4 + 1) def null_storage(): return lltype.nullptr(UTF8_INDEX_STORAGE) -UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) -UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, 0, immortal=True) +UTF8_IS_ASCII = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True) +UTF8_IS_ASCII.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO) +UTF8_HAS_SURROGATES = lltype.malloc(UTF8_INDEX_STORAGE, immortal=True) +UTF8_HAS_SURROGATES.contents = lltype.nullptr(UTF8_INDEX_STORAGE.contents.TO) def create_utf8_index_storage(utf8, utf8len): """ Create an index storage which stores index of each 4th character in utf8 encoded unicode string. """ - if len(utf8) == utf8len < ASCII_INDEX_STORAGE_BLOCKS * 64: - return ASCII_INDEX_STORAGE +# if len(utf8) == utf8len < ASCII_INDEX_STORAGE_BLOCKS * 64: +# return ASCII_INDEX_STORAGE arraysize = utf8len // 64 + 1 - storage = lltype.malloc(UTF8_INDEX_STORAGE, arraysize) + storage = lltype.malloc(UTF8_INDEX_STORAGE) + contents = lltype.malloc(UTF8_INDEX_STORAGE.contents.TO, arraysize) + storage.contents = contents baseindex = 0 current = 0 while True: - storage[current].baseindex = baseindex + contents[current].baseindex = baseindex next = baseindex for i in range(16): if utf8len == 0: next += 1 # assume there is an extra '\x00' character else: next = next_codepoint_pos(utf8, next) - storage[current].ofs[i] = chr(next - baseindex) + contents[current].ofs[i] = chr(next - baseindex) utf8len -= 4 if utf8len < 0: - assert current + 1 == len(storage) + assert current + 1 == len(contents) break next = next_codepoint_pos(utf8, next) next = next_codepoint_pos(utf8, next) @@ -485,8 +490,8 @@ this function. """ current = index >> 6 - ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) - bytepos = storage[current].baseindex + ofs + ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage.contents[current].baseindex + ofs index &= 0x3 if index == 0: return prev_codepoint_pos(utf8, bytepos) @@ -504,8 +509,8 @@ storage of type UTF8_INDEX_STORAGE """ current = index >> 6 - ofs = ord(storage[current].ofs[(index >> 2) & 0x0F]) - bytepos = storage[current].baseindex + ofs + ofs = ord(storage.contents[current].ofs[(index >> 2) & 0x0F]) + bytepos = storage.contents[current].baseindex + ofs index &= 0x3 if index == 0: return codepoint_before_pos(utf8, bytepos) From pypy.commits at gmail.com Sat Nov 4 18:17:03 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:03 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: whack enough to get somewhere with the list strategy - just for ascii-unicode so far Message-ID: <59fe3c5f.3bb0df0a.1515b.2ae9@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92939:0aeb46cc86b0 Date: 2017-11-04 19:37 +0100 http://bitbucket.org/pypy/pypy/changeset/0aeb46cc86b0/ Log: whack enough to get somewhere with the list strategy - just for ascii-unicode so far diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1054,7 +1054,7 @@ """ return None - def listview_unicode(self, w_list): + def listview_utf8(self, w_list): """ Return a list of unwrapped unicode out of a list of unicode. If the argument is not a list or does not contain only unicode, return None. May return None anyway. diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -10,7 +10,7 @@ import operator import sys -from rpython.rlib import debug, jit, rerased +from rpython.rlib import debug, jit, rerased, rutf8 from rpython.rlib.listsort import make_timsort_class from rpython.rlib.objectmodel import ( import_from_mixin, instantiate, newlist_hint, resizelist_hint, specialize) @@ -95,10 +95,11 @@ else: return space.fromcache(BytesListStrategy) - elif False and type(w_firstobj) is W_UnicodeObject: # disable unicode list strat + elif type(w_firstobj) is W_UnicodeObject and w_firstobj.is_ascii(): # check for all-unicodes for i in range(1, len(list_w)): - if type(list_w[i]) is not W_UnicodeObject: + item = list_w[i] + if type(item) is not W_UnicodeObject or not item.is_ascii(): break else: return space.fromcache(UnicodeListStrategy) @@ -196,7 +197,6 @@ @staticmethod def newlist_unicode(space, list_u): - xxxx strategy = space.fromcache(UnicodeListStrategy) storage = strategy.erase(list_u) return W_ListObject.from_storage_and_strategy(space, storage, strategy) @@ -349,10 +349,10 @@ not use the list strategy, return None.""" return self.strategy.getitems_bytes(self) - def getitems_unicode(self): + def getitems_utf8(self): """Return the items in the list as unwrapped unicodes. If the list does not use the list strategy, return None.""" - return self.strategy.getitems_unicode(self) + return self.strategy.getitems_utf8(self) def getitems_int(self): """Return the items in the list as unwrapped ints. If the list does not @@ -813,7 +813,7 @@ def getitems_bytes(self, w_list): return None - def getitems_unicode(self, w_list): + def getitems_utf8(self, w_list): return None def getitems_int(self, w_list): @@ -954,8 +954,8 @@ strategy = self.space.fromcache(IntegerListStrategy) elif type(w_item) is W_BytesObject: strategy = self.space.fromcache(BytesListStrategy) - #elif type(w_item) is W_UnicodeObject: - # strategy = self.space.fromcache(UnicodeListStrategy) + elif type(w_item) is W_UnicodeObject and w_item.is_ascii(): + strategy = self.space.fromcache(UnicodeListStrategy) elif type(w_item) is W_FloatObject: strategy = self.space.fromcache(FloatListStrategy) else: @@ -1025,9 +1025,8 @@ w_list.lstorage = strategy.erase(byteslist[:]) return - if False: - unilist = space.listview_unicode(w_iterable) - if unilist is not None: + unilist = space.listview_utf8(w_iterable) + if unilist is not None: w_list.strategy = strategy = space.fromcache(UnicodeListStrategy) # need to copy because intlist can share with w_iterable w_list.lstorage = strategy.erase(unilist[:]) @@ -1995,11 +1994,11 @@ class UnicodeListStrategy(ListStrategy): import_from_mixin(AbstractUnwrappedStrategy) - _none_value = u"" + _none_value = "" def wrap(self, stringval): assert stringval is not None - return self.space.newunicode(stringval) + return self.space.newutf8(stringval, len(stringval), rutf8.FLAG_ASCII) def unwrap(self, w_string): return self.space.utf8_w(w_string) @@ -2009,7 +2008,7 @@ unerase = staticmethod(unerase) def is_correct_type(self, w_obj): - return type(w_obj) is W_UnicodeObject + return type(w_obj) is W_UnicodeObject and w_obj.is_ascii() def list_is_correct_type(self, w_list): return w_list.strategy is self.space.fromcache(UnicodeListStrategy) @@ -2021,7 +2020,7 @@ if reverse: l.reverse() - def getitems_unicode(self, w_list): + def getitems_utf8(self, w_list): return self.unerase(w_list.lstorage) # _______________________________________________________ diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -164,7 +164,9 @@ if isinstance(x, str): return self.newtext(x) if isinstance(x, unicode): - return self.newutf8(x.encode('utf8'), len(x), rutf8.FLAG_REGULAR) + from pypy.interpreter import unicodehelper + return self.newutf8(x.encode('utf8'), len(x), + unicodehelper._get_flag(x)) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): @@ -507,20 +509,20 @@ return w_obj.getitems_bytes() return None - def listview_unicode(self, w_obj): + def listview_utf8(self, w_obj): # note: uses exact type checking for objects with strategies, # and isinstance() for others. See test_listobject.test_uses_custom... if type(w_obj) is W_ListObject: - return w_obj.getitems_unicode() + return w_obj.getitems_utf8() if type(w_obj) is W_DictObject: return w_obj.listview_unicode() if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject: return w_obj.listview_unicode() if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj) and w_obj.is_ascii()): - return w_obj.listview_unicode() + return w_obj.listview_utf8() if isinstance(w_obj, W_ListObject) and self._uses_list_iter(w_obj): - return w_obj.getitems_unicode() + return w_obj.getitems_utf8() return None def listview_int(self, w_obj): diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -1591,7 +1591,7 @@ w_set.sstorage = strategy.get_storage_from_unwrapped_list(byteslist) return - unicodelist = space.listview_unicode(w_iterable) + unicodelist = space.listview_utf8(w_iterable) if unicodelist is not None: strategy = space.fromcache(UnicodeSetStrategy) w_set.strategy = strategy diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -20,9 +20,9 @@ IntegerListStrategy) assert isinstance(W_ListObject(space, [wb('a'), wb('b')]).strategy, BytesListStrategy) - #assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy, - # UnicodeListStrategy) - assert isinstance(W_ListObject(space, [space.newutf8('a', 1, 0), wb('b')]).strategy, + assert isinstance(W_ListObject(space, [w(u'a'), w(u'b')]).strategy, + UnicodeListStrategy) + assert isinstance(W_ListObject(space, [w(u'a'), wb('b')]).strategy, ObjectListStrategy) # mixed unicode and bytes def test_empty_to_any(self): @@ -47,7 +47,7 @@ l = W_ListObject(space, []) assert isinstance(l.strategy, EmptyListStrategy) l.append(w(u'a')) - #assert isinstance(l.strategy, UnicodeListStrategy) + assert isinstance(l.strategy, UnicodeListStrategy) l = W_ListObject(space, []) assert isinstance(l.strategy, EmptyListStrategy) @@ -74,7 +74,6 @@ assert isinstance(l.strategy, ObjectListStrategy) def test_unicode_to_any(self): - py.test.skip("disabled") space = self.space l = W_ListObject(space, [space.wrap(u'a'), space.wrap(u'b'), space.wrap(u'c')]) assert isinstance(l.strategy, UnicodeListStrategy) @@ -118,7 +117,7 @@ # UnicodeStrategy to ObjectStrategy l = W_ListObject(space, [w(u'a'),w(u'b'),w(u'c')]) - #assert isinstance(l.strategy, UnicodeListStrategy) + assert isinstance(l.strategy, UnicodeListStrategy) l.setitem(0, w(2)) assert isinstance(l.strategy, ObjectListStrategy) @@ -146,7 +145,7 @@ # UnicodeStrategy l = W_ListObject(space, [w(u'a'),w(u'b'),w(u'c')]) - #assert isinstance(l.strategy, UnicodeListStrategy) + assert isinstance(l.strategy, UnicodeListStrategy) l.insert(3, w(2)) assert isinstance(l.strategy, ObjectListStrategy) @@ -226,7 +225,7 @@ # UnicodeStrategy to ObjectStrategy l = W_ListObject(space, [w(u'a'), w(u'b'), w(u'c')]) - #assert isinstance(l.strategy, UnicodeListStrategy) + assert isinstance(l.strategy, UnicodeListStrategy) l.setslice(0, 1, 2, W_ListObject(space, [w(1), w(2), w(3)])) assert isinstance(l.strategy, ObjectListStrategy) @@ -276,7 +275,7 @@ l = W_ListObject(space, wrapitems([u"a",u"b",u"c",u"d",u"e"])) other = W_ListObject(space, wrapitems([u"a", u"b", u"c"])) keep_other_strategy(l, 0, 2, other.length(), other) - #assert l.strategy is space.fromcache(UnicodeListStrategy) + assert l.strategy is space.fromcache(UnicodeListStrategy) l = W_ListObject(space, wrapitems([1.1, 2.2, 3.3, 4.4, 5.5])) other = W_ListObject(space, []) @@ -346,7 +345,7 @@ empty = W_ListObject(space, []) assert isinstance(empty.strategy, EmptyListStrategy) empty.extend(W_ListObject(space, [w(u"a"), w(u"b"), w(u"c")])) - #assert isinstance(empty.strategy, UnicodeListStrategy) + assert isinstance(empty.strategy, UnicodeListStrategy) empty = W_ListObject(space, []) assert isinstance(empty.strategy, EmptyListStrategy) @@ -602,7 +601,7 @@ l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) l2 = W_ListObject(self.space, [self.space.newunicode(u"eins"), self.space.newunicode(u"zwei")]) - #assert isinstance(l2.strategy, UnicodeListStrategy) + assert isinstance(l2.strategy, UnicodeListStrategy) l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newunicode(u"zwei")]) assert isinstance(l3.strategy, ObjectListStrategy) @@ -613,11 +612,10 @@ assert space.listview_bytes(w_l) == ["a", "b"] def test_listview_unicode(self): - py.test.skip("disabled") space = self.space - assert space.listview_unicode(space.wrap(1)) == None + assert space.listview_utf8(space.wrap(1)) == None w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')]) - assert space.listview_unicode(w_l) == [u"a", u"b"] + assert space.listview_utf8(w_l) == ["a", "b"] def test_string_join_uses_listview_bytes(self): space = self.space @@ -626,7 +624,6 @@ assert space.str_w(space.call_method(space.wrap("c"), "join", w_l)) == "acb" # # the same for unicode - py.test.skip("disabled") w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')]) w_l.getitems = None assert space.unicode_w(space.call_method(space.wrap(u"c"), "join", w_l)) == u"acb" @@ -639,7 +636,6 @@ assert space.is_w(space.call_method(space.wrap(" -- "), "join", w_l), w_text) # # the same for unicode - py.test.skip("disabled") w_text = space.wrap(u"text") w_l = self.space.newlist([w_text]) w_l.getitems = None @@ -669,7 +665,6 @@ assert space.listview_bytes(w_l4) == ["a", "b", "c"] def test_unicode_uses_newlist_unicode(self): - py.test.skip("disabled") space = self.space w_u = space.wrap(u"a b c") space.newlist = None @@ -725,7 +720,6 @@ assert self.space.listview_bytes(w_l) == ["a", "b"] def test_listview_unicode_list(self): - py.test.skip("disabled") space = self.space w_l = W_ListObject(space, [space.wrap(u"a"), space.wrap(u"b")]) assert self.space.listview_unicode(w_l) == [u"a", u"b"] diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -116,7 +116,7 @@ charbuf_w = str_w - def listview_unicode(self): + def listview_utf8(self): assert self.is_ascii() return _create_list_from_unicode(self._utf8) @@ -502,9 +502,9 @@ _StringMethods_descr_join = descr_join def descr_join(self, space, w_list): - l = space.listview_unicode(w_list) + l = space.listview_utf8(w_list) if l is not None: - assert False, "unreachable" + xxxx if len(l) == 1: return space.newunicode(l[0]) return space.newunicode(self._utf8).join(l) From pypy.commits at gmail.com Sat Nov 4 18:17:01 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:01 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fix enough to pass all the tests in test_unicodeobject Message-ID: <59fe3c5d.530a1c0a.f6334.38f2@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92938:94c9ccfbd63c Date: 2017-11-04 19:17 +0100 http://bitbucket.org/pypy/pypy/changeset/94c9ccfbd63c/ Log: fix enough to pass all the tests in test_unicodeobject diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -69,7 +69,7 @@ final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle, unicodedata_handler=unicodedata_handler) # XXX argh. we want each surrogate to be encoded separately - utf8 = ''.join([u.encode('utf8') for u in result_u]) + utf8 = result_u.encode('utf8') if rutf8.first_non_ascii_char(utf8) == -1: flag = rutf8.FLAG_ASCII elif _has_surrogate(result_u): diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -475,12 +475,11 @@ # call the fast version for checking try: lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) - except rutf8.CheckError as e: + except rutf8.CheckError: # XXX do the way around runicode - we can optimize it later if we # decide we care about obscure cases - res, consumed, lgt = unicodehelper.str_decode_utf8(string, len(string), - errors, final, state.decode_error_handler) - flag = unicodehelper._get_flag(res.decode("utf8")) + res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, + len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt, flag), space.newint(consumed)]) else: @@ -695,12 +694,11 @@ unicode_name_handler = state.get_unicodedata_handler(space) - result, consumed, lgt = unicodehelper.str_decode_unicode_escape( + result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape( string, len(string), errors, final, state.decode_error_handler, unicode_name_handler) - flag = unicodehelper._get_flag(result.decode('utf8')) return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) # ____________________________________________________________ From pypy.commits at gmail.com Sat Nov 4 18:17:08 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:08 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: add assertions for now Message-ID: <59fe3c64.87c7df0a.df252.a6c0@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92941:4bd78617a41a Date: 2017-11-04 20:37 +0100 http://bitbucket.org/pypy/pypy/changeset/4bd78617a41a/ Log: add assertions for now diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -48,6 +48,9 @@ else: assert flag == rutf8.FLAG_REGULAR self._index_storage = rutf8.null_storage() + lgt, flag_check = rutf8.check_utf8(utf8str, True) + assert lgt == length + assert flag == flag_check # the storage can be one of: # - null, unicode with no surrogates # - rutf8.UTF8_HAS_SURROGATES From pypy.commits at gmail.com Sat Nov 4 18:17:09 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:09 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: update TODO Message-ID: <59fe3c65.8faedf0a.fc527.38cf@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92942:ec7d2032eb70 Date: 2017-11-04 20:38 +0100 http://bitbucket.org/pypy/pypy/changeset/ec7d2032eb70/ Log: update TODO diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -5,7 +5,5 @@ * fix cpyext * write the correct jit_elidable in _get_index_storage * better flag handling in split/splitlines maybe? -* find all the fast-paths that we want to do with utf8 (we only do - utf-8 now, not UTF8 or utf8) for decode/encode * encode_error_handler has XXX -* reenable list strategies for ascii-only unicode +* remove assertions from W_UnicodeObject.__init__ if all the builders pass From pypy.commits at gmail.com Sat Nov 4 18:17:05 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:05 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: fight until the strategies seem to work again for ascii unicode strings at least Message-ID: <59fe3c61.28361c0a.a0b50.ac26@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92940:1645f5285398 Date: 2017-11-04 20:32 +0100 http://bitbucket.org/pypy/pypy/changeset/1645f5285398/ Log: fight until the strategies seem to work again for ascii unicode strings at least diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -3,7 +3,7 @@ from rpython.rlib.cache import Cache from rpython.tool.uid import HUGEVAL_BYTES -from rpython.rlib import jit, types +from rpython.rlib import jit, types, rutf8 from rpython.rlib.debug import make_sure_not_resized from rpython.rlib.objectmodel import (we_are_translated, newlist_hint, compute_unique_id, specialize, not_rpython) @@ -1084,8 +1084,12 @@ def newlist_bytes(self, list_s): return self.newlist([self.newbytes(s) for s in list_s]) - def newlist_unicode(self, list_u): - return self.newlist([self.newunicode(u) for u in list_u]) + def newlist_utf8(self, list_u, is_ascii): + l_w = [None] * len(list_u) + for i, item in enumerate(list_u): + length, flag = rutf8.check_utf8(item, True) + l_w[i] = self.newutf8(item, length, flag) + return self.newlist(l_w) def newlist_int(self, list_i): return self.newlist([self.newint(i) for i in list_i]) diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1,6 +1,6 @@ """The builtin dict implementation""" -from rpython.rlib import jit, rerased, objectmodel +from rpython.rlib import jit, rerased, objectmodel, rutf8 from rpython.rlib.debug import mark_dict_non_null from rpython.rlib.objectmodel import newlist_hint, r_dict, specialize from rpython.tool.sourcetools import func_renamer, func_with_new_name @@ -441,7 +441,7 @@ popitem delitem clear \ length w_keys values items \ iterkeys itervalues iteritems \ - listview_bytes listview_unicode listview_int \ + listview_bytes listview_utf8 listview_int \ view_as_kwargs".split() def make_method(method): @@ -593,7 +593,7 @@ def listview_bytes(self, w_dict): return None - def listview_unicode(self, w_dict): + def listview_utf8(self, w_dict): return None def listview_int(self, w_dict): @@ -640,7 +640,7 @@ if type(w_key) is self.space.StringObjectCls: self.switch_to_bytes_strategy(w_dict) return - elif type(w_key) is self.space.UnicodeObjectCls: + elif type(w_key) is self.space.UnicodeObjectCls and w_key.is_ascii(): self.switch_to_unicode_strategy(w_dict) return w_type = self.space.type(w_key) @@ -1197,14 +1197,14 @@ unerase = staticmethod(unerase) def wrap(self, unwrapped): - return self.space.newunicode(unwrapped) + return self.space.newutf8(unwrapped, len(unwrapped), rutf8.FLAG_ASCII) def unwrap(self, wrapped): - return self.space.unicode_w(wrapped) + return self.space.utf8_w(wrapped) def is_correct_type(self, w_obj): space = self.space - return space.is_w(space.type(w_obj), space.w_unicode) + return type(w_obj) is space.UnicodeObjectCls and w_obj.is_ascii() def get_empty_storage(self): res = {} @@ -1232,14 +1232,14 @@ ## assert key is not None ## return self.unerase(w_dict.dstorage).get(key, None) - def listview_unicode(self, w_dict): + def listview_utf8(self, w_dict): return self.unerase(w_dict.dstorage).keys() ## def w_keys(self, w_dict): ## return self.space.newlist_bytes(self.listview_bytes(w_dict)) def wrapkey(space, key): - return space.newunicode(key) + return space.newutf8(key, len(key), rutf8.FLAG_ASCII) ## @jit.look_inside_iff(lambda self, w_dict: ## w_dict_unrolling_heuristic(w_dict)) diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -196,7 +196,7 @@ return W_ListObject.from_storage_and_strategy(space, storage, strategy) @staticmethod - def newlist_unicode(space, list_u): + def newlist_utf8(space, list_u): strategy = space.fromcache(UnicodeListStrategy) storage = strategy.erase(list_u) return W_ListObject.from_storage_and_strategy(space, storage, strategy) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -309,19 +309,10 @@ newlist_text = newlist_bytes - def newlist_unicode(self, list_u): - xxx - return self.newlist(list_u) - return W_ListObject.newlist_unicode(self, list_u) - - def newlist_utf8(self, lst): - res_w = [] - for utf in lst: - assert utf is not None - assert isinstance(utf, str) - length, flag = rutf8.check_utf8(utf, allow_surrogates=True) - res_w.append(self.newutf8(utf, length, flag)) - return self.newlist(res_w) + def newlist_utf8(self, list_u, is_ascii): + if is_ascii: + return W_ListObject.newlist_utf8(self, list_u) + return ObjSpace.newlist_utf8(self, list_u, False) def newlist_int(self, list_i): return W_ListObject.newlist_int(self, list_i) @@ -515,9 +506,9 @@ if type(w_obj) is W_ListObject: return w_obj.getitems_utf8() if type(w_obj) is W_DictObject: - return w_obj.listview_unicode() + return w_obj.listview_utf8() if type(w_obj) is W_SetObject or type(w_obj) is W_FrozensetObject: - return w_obj.listview_unicode() + return w_obj.listview_utf8() if (isinstance(w_obj, W_UnicodeObject) and self._uni_uses_no_iter(w_obj) and w_obj.is_ascii()): return w_obj.listview_utf8() diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -12,7 +12,7 @@ from rpython.rlib.objectmodel import iterkeys_with_hash, contains_with_hash from rpython.rlib.objectmodel import setitem_with_hash, delitem_with_hash from rpython.rlib.rarithmetic import intmask, r_uint -from rpython.rlib import rerased, jit +from rpython.rlib import rerased, jit, rutf8 UNROLL_CUTOFF = 5 @@ -86,9 +86,9 @@ """ If this is a string set return its contents as a list of uwnrapped strings. Otherwise return None. """ return self.strategy.listview_bytes(self) - def listview_unicode(self): + def listview_utf8(self): """ If this is a unicode set return its contents as a list of uwnrapped unicodes. Otherwise return None. """ - return self.strategy.listview_unicode(self) + return self.strategy.listview_utf8(self) def listview_int(self): """ If this is an int set return its contents as a list of uwnrapped ints. Otherwise return None. """ @@ -690,7 +690,7 @@ def listview_bytes(self, w_set): return None - def listview_unicode(self, w_set): + def listview_utf8(self, w_set): return None def listview_int(self, w_set): @@ -795,8 +795,8 @@ strategy = self.space.fromcache(IntegerSetStrategy) elif type(w_key) is W_BytesObject: strategy = self.space.fromcache(BytesSetStrategy) - #elif type(w_key) is W_UnicodeObject: - # strategy = self.space.fromcache(UnicodeSetStrategy) + elif type(w_key) is W_UnicodeObject and w_key.is_ascii(): + strategy = self.space.fromcache(UnicodeSetStrategy) elif self.space.type(w_key).compares_by_identity(): strategy = self.space.fromcache(IdentitySetStrategy) else: @@ -1272,11 +1272,11 @@ def get_empty_dict(self): return {} - def listview_unicode(self, w_set): + def listview_utf8(self, w_set): return self.unerase(w_set.sstorage).keys() def is_correct_type(self, w_key): - return type(w_key) is W_UnicodeObject + return type(w_key) is W_UnicodeObject and w_key.is_ascii() def may_contain_equal_elements(self, strategy): if strategy is self.space.fromcache(IntegerSetStrategy): @@ -1495,7 +1495,7 @@ def next_entry(self): for key in self.iterator: - return self.space.newunicode(key) + return self.space.newutf8(key, len(key), rutf8.FLAG_ASCII) else: return None @@ -1636,13 +1636,13 @@ return # check for unicode - #for w_item in iterable_w: - # if type(w_item) is not W_UnicodeObject: - # break - #else: - # w_set.strategy = space.fromcache(UnicodeSetStrategy) - # w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w) - # return + for w_item in iterable_w: + if type(w_item) is not W_UnicodeObject or not w_item.is_ascii(): + break + else: + w_set.strategy = space.fromcache(UnicodeSetStrategy) + w_set.sstorage = w_set.strategy.get_storage_from_list(iterable_w) + return # check for compares by identity for w_item in iterable_w: diff --git a/pypy/objspace/std/test/test_dictmultiobject.py b/pypy/objspace/std/test/test_dictmultiobject.py --- a/pypy/objspace/std/test/test_dictmultiobject.py +++ b/pypy/objspace/std/test/test_dictmultiobject.py @@ -142,11 +142,10 @@ assert self.space.listview_bytes(w_d) == ["a", "b"] def test_listview_unicode_dict(self): - py.test.skip("listview_unicode disabled") w = self.space.wrap w_d = self.space.newdict() w_d.initialize_content([(w(u"a"), w(1)), (w(u"b"), w(2))]) - assert self.space.listview_unicode(w_d) == [u"a", u"b"] + assert self.space.listview_utf8(w_d) == ["a", "b"] def test_listview_int_dict(self): w = self.space.wrap diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -675,10 +675,10 @@ w_l4 = space.call_method(w_u, "rsplit", space.wrap(" ")) finally: del space.newlist - assert space.listview_unicode(w_l) == [u"a", u"b", u"c"] - assert space.listview_unicode(w_l2) == [u"a", u"b", u"c"] - assert space.listview_unicode(w_l3) == [u"a", u"b", u"c"] - assert space.listview_unicode(w_l4) == [u"a", u"b", u"c"] + assert space.listview_utf8(w_l) == [u"a", u"b", u"c"] + assert space.listview_utf8(w_l2) == [u"a", u"b", u"c"] + assert space.listview_utf8(w_l3) == [u"a", u"b", u"c"] + assert space.listview_utf8(w_l4) == [u"a", u"b", u"c"] def test_pop_without_argument_is_fast(self): space = self.space @@ -722,7 +722,7 @@ def test_listview_unicode_list(self): space = self.space w_l = W_ListObject(space, [space.wrap(u"a"), space.wrap(u"b")]) - assert self.space.listview_unicode(w_l) == [u"a", u"b"] + assert self.space.listview_utf8(w_l) == [u"a", u"b"] def test_listview_int_list(self): space = self.space diff --git a/pypy/objspace/std/test/test_setstrategies.py b/pypy/objspace/std/test/test_setstrategies.py --- a/pypy/objspace/std/test/test_setstrategies.py +++ b/pypy/objspace/std/test/test_setstrategies.py @@ -42,7 +42,6 @@ assert s1.strategy is self.space.fromcache(ObjectSetStrategy) def test_switch_to_unicode(self): - py.test.skip("disabled") s = W_SetObject(self.space, self.wrapped([])) s.add(self.space.wrap(u"six")) assert s.strategy is self.space.fromcache(UnicodeSetStrategy) diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -28,7 +28,7 @@ def test_listview_unicode(self): w_str = self.space.newutf8('abcd', 4, rutf8.FLAG_ASCII) - assert self.space.listview_unicode(w_str) == list(u"abcd") + assert self.space.listview_utf8(w_str) == list("abcd") def test_new_shortcut(self): space = self.space diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -196,10 +196,6 @@ def _islinebreak(self, s, pos): return rutf8.islinebreak(s, pos) - def _newlist_unwrapped(self, space, lst): - assert False, "should not be called" - return space.newlist_unicode(lst) - @staticmethod @unwrap_spec(w_string=WrappedDefault("")) def descr_new(space, w_unicodetype, w_string, w_encoding=None, @@ -503,11 +499,11 @@ _StringMethods_descr_join = descr_join def descr_join(self, space, w_list): l = space.listview_utf8(w_list) - if l is not None: - xxxx + if l is not None and self.is_ascii(): if len(l) == 1: - return space.newunicode(l[0]) - return space.newunicode(self._utf8).join(l) + return space.newutf8(l[0], len(l[0]), rutf8.FLAG_ASCII) + s = self._utf8.join(l) + return space.newutf8(s, len(s), rutf8.FLAG_ASCII) return self._StringMethods_descr_join(space, w_list) def _join_return_one(self, space, w_obj): @@ -755,14 +751,14 @@ value = self._utf8 if space.is_none(w_sep): res = split(value, maxsplit=maxsplit, isutf8=True) - return space.newlist_utf8(res) + return space.newlist_utf8(res, self.is_ascii()) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = split(value, by, maxsplit, isutf8=True) - return space.newlist_utf8(res) + return space.newlist_utf8(res, self.is_ascii()) @unwrap_spec(maxsplit=int) def descr_rsplit(self, space, w_sep=None, maxsplit=-1): @@ -770,14 +766,14 @@ value = self._utf8 if space.is_none(w_sep): res = rsplit(value, maxsplit=maxsplit, isutf8=True) - return space.newlist_utf8(res) + return space.newlist_utf8(res, self.is_ascii()) by = self.convert_arg_to_w_unicode(space, w_sep)._utf8 if len(by) == 0: raise oefmt(space.w_ValueError, "empty separator") res = rsplit(value, by, maxsplit, isutf8=True) - return space.newlist_utf8(res) + return space.newlist_utf8(res, self.is_ascii()) def descr_getitem(self, space, w_index): if isinstance(w_index, W_SliceObject): From pypy.commits at gmail.com Sat Nov 4 18:17:11 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 04 Nov 2017 15:17:11 -0700 (PDT) Subject: [pypy-commit] pypy unicode-utf8: ups, fix Message-ID: <59fe3c67.0e781c0a.320bc.b089@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r92943:10e8aaa42286 Date: 2017-11-04 20:46 +0100 http://bitbucket.org/pypy/pypy/changeset/10e8aaa42286/ Log: ups, fix diff --git a/pypy/objspace/std/setobject.py b/pypy/objspace/std/setobject.py --- a/pypy/objspace/std/setobject.py +++ b/pypy/objspace/std/setobject.py @@ -1288,10 +1288,10 @@ return True def unwrap(self, w_item): - return self.space.unicode_w(w_item) + return self.space.utf8_w(w_item) def wrap(self, item): - return self.space.newunicode(item) + return self.space.newutf8(item, len(item), rutf8.FLAG_ASCII) def iter(self, w_set): return UnicodeIteratorImplementation(self.space, self, w_set) From pypy.commits at gmail.com Sun Nov 5 05:34:05 2017 From: pypy.commits at gmail.com (antocuni) Date: Sun, 05 Nov 2017 02:34:05 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: try to install vmprof and see what happens to test_enable and test_native Message-ID: <59fee91d.759adf0a.8ffa5.193e@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92944:4134cbd25c42 Date: 2017-11-05 11:33 +0100 http://bitbucket.org/pypy/pypy/changeset/4134cbd25c42/ Log: try to install vmprof and see what happens to test_enable and test_native diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ cffi>=1.4.0 +vmprof>=0.4.10 # required to parse log files in rvmprof tests # hypothesis is used for test generation on untranslated tests hypothesis From pypy.commits at gmail.com Sun Nov 5 07:33:17 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 05 Nov 2017 04:33:17 -0800 (PST) Subject: [pypy-commit] pypy assert-rewrite: Fix rpython/memory/ tests Message-ID: <59ff050d.1bb3df0a.b3075.10c0@mx.google.com> Author: Ronan Lamy Branch: assert-rewrite Changeset: r92945:b7b55dee74c6 Date: 2016-11-29 21:41 +0000 http://bitbucket.org/pypy/pypy/changeset/b7b55dee74c6/ Log: Fix rpython/memory/ tests diff --git a/rpython/memory/test/test_hybrid_gc.py b/rpython/memory/test/test_hybrid_gc.py --- a/rpython/memory/test/test_hybrid_gc.py +++ b/rpython/memory/test/test_hybrid_gc.py @@ -2,6 +2,7 @@ from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop +from rpython.rlib.objectmodel import assert_ from rpython.memory.test import test_generational_gc @@ -35,12 +36,12 @@ while i < x: gc.collect() i += 1 - assert ref() is a - assert ref().x == 42 + assert_(ref() is a) + assert_(ref().x == 42) return ref def step2(ref): gc.collect() # 'a' is freed here - assert ref() is None + assert_(ref() is None) def f(x): ref = step1(x) step2(ref) diff --git a/rpython/memory/test/test_incminimark_gc.py b/rpython/memory/test/test_incminimark_gc.py --- a/rpython/memory/test/test_incminimark_gc.py +++ b/rpython/memory/test/test_incminimark_gc.py @@ -2,6 +2,7 @@ from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rlib import rgc +from rpython.rlib.objectmodel import assert_ from rpython.memory.test import test_minimark_gc @@ -21,8 +22,8 @@ a.x = 5 wr = weakref.ref(a) llop.gc__collect(lltype.Void) # make everything old - assert wr() is not None - assert a.x == 5 + assert_(wr() is not None) + assert_(a.x == 5) return wr def f(): ref = g() @@ -31,7 +32,7 @@ # to an object not found, but still reachable: b = ref() llop.debug_print(lltype.Void, b) - assert b is not None + assert_(b is not None) llop.gc__collect(lltype.Void) # finish the major cycle # assert does not crash, because 'b' is still kept alive b.x = 42 @@ -46,7 +47,7 @@ def f(): a = A() ref = weakref.ref(a) - assert not rgc.pin(ref) + assert_(not rgc.pin(ref)) self.interpret(f, []) def test_pin_finalizer_not_implemented(self): @@ -63,8 +64,8 @@ def f(): a = A() b = B() - assert not rgc.pin(a) - assert not rgc.pin(b) + assert_(not rgc.pin(a)) + assert_(not rgc.pin(b)) self.interpret(f, []) def test_weakref_to_pinned(self): @@ -75,18 +76,18 @@ pass def g(): a = A() - assert rgc.pin(a) + assert_(rgc.pin(a)) a.x = 100 wr = weakref.ref(a) llop.gc__collect(lltype.Void) - assert wr() is not None - assert a.x == 100 + assert_(wr() is not None) + assert_(a.x == 100) return wr def f(): ref = g() llop.gc__collect(lltype.Void, 1) b = ref() - assert b is not None + assert_(b is not None) b.x = 101 return ref() is b res = self.interpret(f, []) diff --git a/rpython/memory/test/test_transformed_gc.py b/rpython/memory/test/test_transformed_gc.py --- a/rpython/memory/test/test_transformed_gc.py +++ b/rpython/memory/test/test_transformed_gc.py @@ -15,6 +15,7 @@ from rpython.rlib.rstring import StringBuilder from rpython.rlib.rarithmetic import LONG_BIT from rpython.rtyper.rtyper import llinterp_backend +from rpython.rlib.objectmodel import assert_ WORD = LONG_BIT // 8 @@ -804,7 +805,7 @@ [A() for i in range(20)] i = 0 while i < len(alist): - assert idarray[i] == compute_unique_id(alist[i]) + assert_(idarray[i] == compute_unique_id(alist[i])) i += 1 j += 1 lltype.free(idarray, flavor='raw') @@ -855,7 +856,7 @@ if cls.gcname == 'incminimark': marker = cls.marker def cleanup(): - assert marker[0] > 0 + assert_(marker[0] > 0) marker[0] = 0 else: cleanup = None @@ -987,7 +988,7 @@ for i in range(20): x.append((1, lltype.malloc(S))) for i in range(50): - assert l2[i] == l[50 + i] + assert_(l2[i] == l[50 + i]) return 0 return fn @@ -1036,9 +1037,9 @@ while i < x: all[i] = [i] * i i += 1 - assert ref() is a + assert_(ref() is a) llop.gc__collect(lltype.Void) - assert ref() is a + assert_(ref() is a) return a.foo + len(all) return f @@ -1115,7 +1116,7 @@ i = 0 while i < 17: ref = weakref.ref(a) - assert ref() is a + assert_(ref() is a) i += 1 return 0 @@ -1182,9 +1183,9 @@ a1 = A() nf1 = nf_a.address[0] nt1 = nt_a.address[0] - assert nf1 > nf0 - assert nt1 > nf1 - assert nt1 == nt0 + assert_(nf1 > nf0) + assert_(nt1 > nf1) + assert_(nt1 == nt0) return 0 return f @@ -1359,7 +1360,7 @@ hashes.append(compute_identity_hash(obj)) unique = {} for i in range(len(objects)): - assert compute_identity_hash(objects[i]) == hashes[i] + assert_(compute_identity_hash(objects[i]) == hashes[i]) unique[hashes[i]] = None return len(unique) return fn diff --git a/rpython/rtyper/test/test_exception.py b/rpython/rtyper/test/test_exception.py --- a/rpython/rtyper/test/test_exception.py +++ b/rpython/rtyper/test/test_exception.py @@ -49,39 +49,39 @@ try: g(n) except IOError as e: - assert e.errno == 0 - assert e.strerror == "test" - assert e.filename is None + assert_(e.errno == 0) + assert_(e.strerror == "test") + assert_(e.filename is None) else: - assert False + assert_(False) try: h(n) except OSError as e: - assert e.errno == 42 - assert e.strerror == "?" - assert e.filename is None + assert_(e.errno == 42) + assert_(e.strerror == "?") + assert_(e.filename is None) else: - assert False + assert_(False) try: i(n) except EnvironmentError as e: - assert e.errno == 42 - assert e.strerror == "?" - assert e.filename == "test" + assert_(e.errno == 42) + assert_(e.strerror == "?") + assert_(e.filename == "test") else: - assert False + assert_(False) try: j(n) except (IOError, OSError) as e: - assert e.errno == 0 - assert e.strerror == "test" - assert e.filename is None + assert_(e.errno == 0) + assert_(e.strerror == "test") + assert_(e.filename is None) try: k(n) except EnvironmentError as e: - assert e.errno == 0 - assert e.strerror is None - assert e.filename is None + assert_(e.errno == 0) + assert_(e.strerror is None) + assert_(e.filename is None) self.interpret(f, [42]) def test_catch_incompatible_class(self): @@ -91,7 +91,7 @@ pass def f(n): try: - assert n < 10 + assert_(n < 10) except MyError as operr: h(operr) res = self.interpret(f, [7]) From pypy.commits at gmail.com Sun Nov 5 09:48:01 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 05 Nov 2017 06:48:01 -0800 (PST) Subject: [pypy-commit] pypy assert-rewrite: Fix asserts in rpython/rtyper/ Message-ID: <59ff24a1.3bb0df0a.1515b.83b2@mx.google.com> Author: Ronan Lamy Branch: assert-rewrite Changeset: r92947:b994fd06043b Date: 2017-11-05 14:43 +0000 http://bitbucket.org/pypy/pypy/changeset/b994fd06043b/ Log: Fix asserts in rpython/rtyper/ diff --git a/rpython/rtyper/lltypesystem/test/test_llarena.py b/rpython/rtyper/lltypesystem/test/test_llarena.py --- a/rpython/rtyper/lltypesystem/test/test_llarena.py +++ b/rpython/rtyper/lltypesystem/test/test_llarena.py @@ -1,5 +1,6 @@ import py, os +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem import lltype, llmemory, rffi, llarena from rpython.rtyper.lltypesystem.llarena import (arena_malloc, arena_reset, arena_reserve, arena_free, round_up_for_allocation, ArenaError, @@ -143,12 +144,12 @@ b = a + round_up_for_allocation(llmemory.sizeof(lltype.Char)) arena_reserve(b, precomputed_size) (b + llmemory.offsetof(SX, 'x')).signed[0] = 123 - assert llmemory.cast_adr_to_ptr(b, SPTR).x == 123 + assert_(llmemory.cast_adr_to_ptr(b, SPTR).x == 123) llmemory.cast_adr_to_ptr(b, SPTR).x += 1 - assert (b + llmemory.offsetof(SX, 'x')).signed[0] == 124 + assert_((b + llmemory.offsetof(SX, 'x')).signed[0] == 124) arena_reset(a, myarenasize, True) arena_reserve(b, round_up_for_allocation(llmemory.sizeof(SX))) - assert llmemory.cast_adr_to_ptr(b, SPTR).x == 0 + assert_(llmemory.cast_adr_to_ptr(b, SPTR).x == 0) arena_free(a) return 42 @@ -334,7 +335,7 @@ arena_reserve(a, llmemory.sizeof(S)) p = llmemory.cast_adr_to_ptr(a + 23432, lltype.Ptr(S)) p.x = 123 - assert p.x == 123 + assert_(p.x == 123) arena_protect(a, 65536, True) result = 0 if testrun == 1: diff --git a/rpython/rtyper/lltypesystem/test/test_llgroup.py b/rpython/rtyper/lltypesystem/test/test_llgroup.py --- a/rpython/rtyper/lltypesystem/test/test_llgroup.py +++ b/rpython/rtyper/lltypesystem/test/test_llgroup.py @@ -1,3 +1,4 @@ +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.llgroup import * from rpython.rtyper.lltypesystem.lloperation import llop from rpython.rtyper.test.test_llinterp import interpret @@ -76,37 +77,37 @@ # def f(): p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, test.g1a) - assert p == test.p1a + assert_(p == test.p1a) p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, test.g1b) - assert p == test.p1b + assert_(p == test.p1b) p = llop.get_group_member(lltype.Ptr(test.S2), grpptr, test.g2a) - assert p == test.p2a + assert_(p == test.p2a) p = llop.get_group_member(lltype.Ptr(test.S2), grpptr, test.g2b) - assert p == test.p2b + assert_(p == test.p2b) # p = llop.get_next_group_member(lltype.Ptr(test.S2), grpptr, test.g1a, llmemory.sizeof(test.S1)) - assert p == test.p2a + assert_(p == test.p2a) p = llop.get_next_group_member(lltype.Ptr(test.S2), grpptr, test.g2a, llmemory.sizeof(test.S2)) - assert p == test.p2b + assert_(p == test.p2b) p = llop.get_next_group_member(lltype.Ptr(test.S1), grpptr, test.g2b, llmemory.sizeof(test.S2)) - assert p == test.p1b + assert_(p == test.p1b) # expected = [123, 456] for i in range(2): p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, g1x[i]) - assert p.x == expected[i] + assert_(p.x == expected[i]) # for i in range(2): s = llop.extract_ushort(HALFWORD, cslist[i]) p = llop.get_group_member(lltype.Ptr(test.S1), grpptr, s) - assert p == test.p1b - assert cslist[0] & ~MASK == 0x45 << HALFSHIFT - assert cslist[1] & ~MASK == 0x41 << HALFSHIFT - assert cslist[0] >> HALFSHIFT == 0x45 - assert cslist[1] >> (HALFSHIFT+1) == 0x41 >> 1 + assert_(p == test.p1b) + assert_(cslist[0] & ~MASK == 0x45 << HALFSHIFT) + assert_(cslist[1] & ~MASK == 0x41 << HALFSHIFT) + assert_(cslist[0] >> HALFSHIFT == 0x45) + assert_(cslist[1] >> (HALFSHIFT+1) == 0x41 >> 1) # return 42 return f diff --git a/rpython/rtyper/lltypesystem/test/test_llmemory.py b/rpython/rtyper/lltypesystem/test/test_llmemory.py --- a/rpython/rtyper/lltypesystem/test/test_llmemory.py +++ b/rpython/rtyper/lltypesystem/test/test_llmemory.py @@ -1,3 +1,4 @@ +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.llmemory import * from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.test.test_llinterp import interpret @@ -40,7 +41,7 @@ assert b.signed[0] == 123 b.signed[0] = 234 assert s2.s.x == 234 - + def test_array(): A = lltype.GcArray(lltype.Signed) x = lltype.malloc(A, 5) @@ -85,7 +86,7 @@ o = AddressOffset() py.test.raises(TypeError, "1 + o") py.test.raises(TypeError, "o + 1") - + def test_sizeof(): # this is mostly an "assert not raises" sort of test array = lltype.Array(lltype.Signed) @@ -421,7 +422,7 @@ py.test.raises(RuntimeError, "p_s.x = 2") repr(adr) str(p_s) - + T = lltype.GcStruct('T', ('s', S)) adr = raw_malloc(sizeof(T)) p_s = cast_adr_to_ptr(adr, lltype.Ptr(S)) @@ -431,7 +432,7 @@ py.test.raises(RuntimeError, "p_s.x = 2") repr(adr) str(p_s) - + U = lltype.Struct('U', ('y', lltype.Signed)) T = lltype.GcStruct('T', ('x', lltype.Signed), ('u', U)) adr = raw_malloc(sizeof(T)) @@ -446,10 +447,10 @@ def test_raw_free_with_hdr(): from rpython.memory.gcheader import GCHeaderBuilder - + HDR = lltype.Struct('h', ('t', lltype.Signed)) gh = GCHeaderBuilder(HDR).size_gc_header - + A = lltype.GcArray(lltype.Signed) adr = raw_malloc(gh+sizeof(A, 10)) p_a = cast_adr_to_ptr(adr+gh, lltype.Ptr(A)) @@ -471,7 +472,7 @@ py.test.raises(RuntimeError, "p_s.x = 2") repr(adr) str(p_s) - + T = lltype.GcStruct('T', ('s', S)) adr = raw_malloc(gh+sizeof(T)) p_s = cast_adr_to_ptr(adr+gh, lltype.Ptr(S)) @@ -482,7 +483,7 @@ py.test.raises(RuntimeError, "p_s.x = 2") repr(adr) str(p_s) - + U = lltype.Struct('U', ('y', lltype.Signed)) T = lltype.GcStruct('T', ('x', lltype.Signed), ('u', U)) adr = raw_malloc(gh+sizeof(T)) @@ -656,6 +657,6 @@ ptr = lltype.malloc(A, 10) gcref = lltype.cast_opaque_ptr(GCREF, ptr) adr = lltype.cast_ptr_to_int(gcref) - assert adr == lltype.cast_ptr_to_int(ptr) + assert_(adr == lltype.cast_ptr_to_int(ptr)) f() interpret(f, []) diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py --- a/rpython/rtyper/lltypesystem/test/test_rffi.py +++ b/rpython/rtyper/lltypesystem/test/test_rffi.py @@ -1,6 +1,7 @@ import py import sys +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.rffi import * from rpython.rtyper.lltypesystem.rffi import _keeper_for_type # crap from rpython.rlib.rposix import get_saved_errno, set_saved_errno @@ -611,9 +612,9 @@ p1bis = make(X1) p2bis = make(X2) structcopy(p1bis, p1) - assert p1bis.a == 5 - assert p1bis.x2.x == 456 - assert p1bis.p == p2 + assert_(p1bis.a == 5) + assert_(p1bis.x2.x == 456) + assert_(p1bis.p == p2) structcopy(p2bis, p2) res = p2bis.x lltype.free(p2bis, flavor='raw') @@ -697,11 +698,11 @@ def f(): raw = str2charp("XxxZy") n = str2chararray("abcdef", raw, 4) - assert raw[0] == 'a' - assert raw[1] == 'b' - assert raw[2] == 'c' - assert raw[3] == 'd' - assert raw[4] == 'y' + assert_(raw[0] == 'a') + assert_(raw[1] == 'b') + assert_(raw[2] == 'c') + assert_(raw[3] == 'd') + assert_(raw[4] == 'y') lltype.free(raw, flavor='raw') return n @@ -796,9 +797,9 @@ for i in xrange(len(data)): a[i] = data[i] a2 = ptradd(a, 2) - assert lltype.typeOf(a2) == lltype.typeOf(a) == lltype.Ptr(ARRAY_OF_CHAR) + assert_(lltype.typeOf(a2) == lltype.typeOf(a) == lltype.Ptr(ARRAY_OF_CHAR)) for i in xrange(len(data) - 2): - assert a2[i] == a[i + 2] + assert_(a2[i] == a[i + 2]) lltype.free(a, flavor='raw') def test_ptradd_interpret(): diff --git a/rpython/rtyper/lltypesystem/test/test_ztranslated.py b/rpython/rtyper/lltypesystem/test/test_ztranslated.py --- a/rpython/rtyper/lltypesystem/test/test_ztranslated.py +++ b/rpython/rtyper/lltypesystem/test/test_ztranslated.py @@ -1,4 +1,5 @@ import gc +from rpython.rlib.objectmodel import assert_ from rpython.translator.c.test.test_genc import compile from rpython.rtyper.lltypesystem import rffi from rpython.rtyper.lltypesystem import lltype @@ -8,7 +9,7 @@ def debug_assert(boolresult, msg): if not boolresult: llop.debug_print(lltype.Void, "\n\nassert failed: %s\n\n" % msg) - assert boolresult + assert_(boolresult) def use_str(): mystr = b'abc' diff --git a/rpython/rtyper/test/test_exception.py b/rpython/rtyper/test/test_exception.py --- a/rpython/rtyper/test/test_exception.py +++ b/rpython/rtyper/test/test_exception.py @@ -1,5 +1,6 @@ import py +from rpython.rlib.objectmodel import assert_ from rpython.translator.translator import TranslationContext from rpython.rtyper.test.tool import BaseRtypingTest from rpython.rtyper.llinterp import LLException diff --git a/rpython/rtyper/test/test_llann.py b/rpython/rtyper/test/test_llann.py --- a/rpython/rtyper/test/test_llann.py +++ b/rpython/rtyper/test/test_llann.py @@ -1,6 +1,7 @@ import py from rpython.annotator import model as annmodel +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.llannotation import SomePtr, lltype_to_annotation from rpython.conftest import option from rpython.rtyper.annlowlevel import (annotate_lowlevel_helper, @@ -456,7 +457,7 @@ s.y = y fptr = llhelper(F, f) gptr = llhelper(G, g) - assert typeOf(fptr) == F + assert_(typeOf(fptr) == F) return fptr(s, z)+fptr(s, z*2)+gptr(s) res = interpret(h, [8, 5, 2]) @@ -478,7 +479,7 @@ s.x = x s.y = y fptr = llhelper(F, myfuncs[z]) - assert typeOf(fptr) == F + assert_(typeOf(fptr) == F) return fptr(s) res = interpret(h, [80, 5, 0]) diff --git a/rpython/rtyper/test/test_llinterp.py b/rpython/rtyper/test/test_llinterp.py --- a/rpython/rtyper/test/test_llinterp.py +++ b/rpython/rtyper/test/test_llinterp.py @@ -1,6 +1,7 @@ -from __future__ import with_statement import py import sys + +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.lltype import typeOf, Void, malloc, free from rpython.rtyper.llinterp import LLInterpreter, LLException, log from rpython.rtyper.rmodel import inputconst @@ -571,7 +572,7 @@ with scoped_alloc(T, 1) as array: array[0] = -42 x = array[0] - assert x == -42 + assert_(x == -42) res = interpret(f, []) diff --git a/rpython/rtyper/test/test_nongc.py b/rpython/rtyper/test/test_nongc.py --- a/rpython/rtyper/test/test_nongc.py +++ b/rpython/rtyper/test/test_nongc.py @@ -1,10 +1,10 @@ import py +from rpython.rlib.objectmodel import assert_, free_non_gc_object from rpython.annotator import model as annmodel +from rpython.annotator.annrpython import RPythonAnnotator from rpython.rtyper.llannotation import SomeAddress -from rpython.annotator.annrpython import RPythonAnnotator from rpython.rtyper.rtyper import RPythonTyper -from rpython.rlib.objectmodel import free_non_gc_object from rpython.rtyper.test.test_llinterp import interpret as llinterpret def interpret(f, args): @@ -100,13 +100,13 @@ if i == 0: pass elif i == 1: - assert isinstance(o, A) + assert_(isinstance(o, A)) free_non_gc_object(o) elif i == 2: - assert isinstance(o, B) + assert_(isinstance(o, B)) free_non_gc_object(o) else: - assert isinstance(o, C) + assert_(isinstance(o, C)) free_non_gc_object(o) return res diff --git a/rpython/rtyper/test/test_rclass.py b/rpython/rtyper/test/test_rclass.py --- a/rpython/rtyper/test/test_rclass.py +++ b/rpython/rtyper/test/test_rclass.py @@ -4,6 +4,7 @@ from rpython.flowspace.model import summary from rpython.rlib.rarithmetic import r_longlong +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.lltype import (typeOf, Signed, getRuntimeTypeInfo, identityhash) from rpython.rtyper.error import TyperError @@ -1248,13 +1249,13 @@ self.data[i] = v def __getslice__(self, start, stop): - assert start >= 0 - assert stop >= 0 + assert_(start >= 0) + assert_(stop >= 0) return self.data[start:stop] def __setslice__(self, start, stop, v): - assert start >= 0 - assert stop >= 0 + assert_(start >= 0) + assert_(stop >= 0) i = 0 for n in range(start, stop): self.data[n] = v[i] diff --git a/rpython/rtyper/test/test_rdict.py b/rpython/rtyper/test/test_rdict.py --- a/rpython/rtyper/test/test_rdict.py +++ b/rpython/rtyper/test/test_rdict.py @@ -2,22 +2,24 @@ from contextlib import contextmanager import signal -from rpython.translator.translator import TranslationContext -from rpython.annotator.model import ( - SomeInteger, SomeString, SomeChar, SomeUnicodeString, SomeUnicodeCodePoint) -from rpython.annotator.dictdef import DictKey, DictValue -from rpython.rtyper.lltypesystem import lltype, rffi -from rpython.rtyper.lltypesystem import rdict -from rpython.rtyper.test.tool import BaseRtypingTest -from rpython.rlib.objectmodel import r_dict -from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong - import py from hypothesis import settings from hypothesis.strategies import ( builds, sampled_from, binary, just, integers, text, characters, tuples) from hypothesis.stateful import GenericStateMachine, run_state_machine_as_test +from rpython.translator.translator import TranslationContext +from rpython.annotator.model import ( + SomeInteger, SomeString, SomeChar, SomeUnicodeString, SomeUnicodeCodePoint) +from rpython.annotator.dictdef import DictKey, DictValue +from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong +from rpython.rlib.objectmodel import assert_ +from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rtyper.lltypesystem import rdict +from rpython.rtyper.test.tool import BaseRtypingTest +from rpython.rlib.objectmodel import r_dict + + def ann2strategy(s_value): if isinstance(s_value, SomeChar): return builds(chr, integers(min_value=0, max_value=255)) @@ -192,7 +194,7 @@ for value in d.itervalues(): k2 = k2 * value for key, value in d.iteritems(): - assert d[key] == value + assert_(d[key] == value) k3 = k3 * value return k1 + k2 + k3 res = self.interpret(func, []) @@ -702,15 +704,15 @@ d[5] = 2 d[6] = 3 k1, v1 = d.popitem() - assert len(d) == 1 + assert_(len(d) == 1) k2, v2 = d.popitem() try: d.popitem() except KeyError: pass else: - assert 0, "should have raised KeyError" - assert len(d) == 0 + assert_(0, "should have raised KeyError") + assert_(len(d) == 0) return k1*1000 + v1*100 + k2*10 + v2 res = self.interpret(func, []) @@ -960,15 +962,15 @@ d[5] = 2 d[6] = 3 k1, v1 = d.popitem() - assert len(d) == 1 + assert_(len(d) == 1) k2, v2 = d.popitem() try: d.popitem() except KeyError: pass else: - assert 0, "should have raised KeyError" - assert len(d) == 0 + assert_(0, "should have raised KeyError") + assert_(len(d) == 0) return k1*1000 + v1*100 + k2*10 + v2 res = self.interpret(func, []) diff --git a/rpython/rtyper/test/test_rint.py b/rpython/rtyper/test/test_rint.py --- a/rpython/rtyper/test/test_rint.py +++ b/rpython/rtyper/test/test_rint.py @@ -1,10 +1,12 @@ import py -import sys, operator +import sys +import operator + from rpython.translator.translator import TranslationContext +from rpython.rlib.objectmodel import assert_, compute_hash from rpython.rtyper.test import snippet from rpython.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong from rpython.rlib.rarithmetic import ovfcheck, r_int64, intmask, int_between -from rpython.rlib import objectmodel from rpython.rtyper.test.tool import BaseRtypingTest from rpython.flowspace.model import summary @@ -392,16 +394,16 @@ def test_int_py_div_nonnegargs(self): def f(x, y): - assert x >= 0 - assert y >= 0 + assert_(x >= 0) + assert_(y >= 0) return x // y res = self.interpret(f, [1234567, 123]) assert res == 1234567 // 123 def test_int_py_mod_nonnegargs(self): def f(x, y): - assert x >= 0 - assert y >= 0 + assert_(x >= 0) + assert_(y >= 0) return x % y res = self.interpret(f, [1234567, 123]) assert res == 1234567 % 123 @@ -418,7 +420,7 @@ def test_hash(self): def f(x): - return objectmodel.compute_hash(x) + return compute_hash(x) res = self.interpret(f, [123456789]) assert res == 123456789 res = self.interpret(f, [r_int64(123456789012345678)]) diff --git a/rpython/rtyper/test/test_rlist.py b/rpython/rtyper/test/test_rlist.py --- a/rpython/rtyper/test/test_rlist.py +++ b/rpython/rtyper/test/test_rlist.py @@ -3,11 +3,13 @@ import py +from rpython.rlib.objectmodel import assert_, newlist_hint, resizelist_hint from rpython.rtyper.debug import ll_assert from rpython.rtyper.error import TyperError from rpython.rtyper.llinterp import LLException, LLAssertFailure from rpython.rtyper.lltypesystem import rlist as ll_rlist -from rpython.rtyper.lltypesystem.rlist import ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist +from rpython.rtyper.lltypesystem.rlist import ( + ListRepr, FixedSizeListRepr, ll_newlist, ll_fixed_newlist) from rpython.rtyper.rint import signed_repr from rpython.rtyper.rlist import * from rpython.rtyper.test.tool import BaseRtypingTest @@ -959,7 +961,7 @@ x = l.pop() x = l.pop() x = l2.pop() - return str(x)+";"+str(l) + return str(x) + ";" + str(l) res = self.ll_to_string(self.interpret(fn, [])) res = res.replace('rpython.rtyper.test.test_rlist.', '') res = re.sub(' at 0x[a-z0-9]+', '', res) @@ -1167,7 +1169,7 @@ lst = [fr, fr] lst.append(fr) del lst[1] - assert lst[0] is fr + assert_(lst[0] is fr) return len(lst) res = self.interpret(f, []) assert res == 2 @@ -1202,9 +1204,9 @@ def test_list_equality(self): def dummyfn(n): lst = [12] * n - assert lst == [12, 12, 12] + assert_(lst == [12, 12, 12]) lst2 = [[12, 34], [5], [], [12, 12, 12], [5]] - assert lst in lst2 + assert_(lst in lst2) self.interpret(dummyfn, [3]) def test_list_remove(self): @@ -1215,7 +1217,6 @@ res = self.interpret(dummyfn, [1, 0]) assert res == 0 - def test_getitem_exc_1(self): def f(x): l = [1] @@ -1339,7 +1340,7 @@ def test_charlist_extension_2(self): def f(n, i): s = 'hello%d' % n - assert 0 <= i <= len(s) + assert_(0 <= i <= len(s)) l = ['a', 'b'] l += s[i:] return ''.join(l) @@ -1349,7 +1350,7 @@ def test_unicharlist_extension_2(self): def f(n, i): s = 'hello%d' % n - assert 0 <= i <= len(s) + assert_(0 <= i <= len(s)) l = [u'a', u'b'] l += s[i:] return ''.join([chr(ord(c)) for c in l]) @@ -1359,7 +1360,7 @@ def test_extend_a_non_char_list_2(self): def f(n, i): s = 'hello%d' % n - assert 0 <= i <= len(s) + assert_(0 <= i <= len(s)) l = ['foo', 'bar'] l += s[i:] # NOT SUPPORTED for now if l is not a list of chars return ''.join(l) @@ -1368,7 +1369,7 @@ def test_charlist_extension_3(self): def f(n, i, j): s = 'hello%d' % n - assert 0 <= i <= j <= len(s) + assert_(0 <= i <= j <= len(s)) l = ['a', 'b'] l += s[i:j] return ''.join(l) @@ -1378,7 +1379,7 @@ def test_unicharlist_extension_3(self): def f(n, i, j): s = 'hello%d' % n - assert 0 <= i <= j <= len(s) + assert_(0 <= i <= j <= len(s)) l = [u'a', u'b'] l += s[i:j] return ''.join([chr(ord(c)) for c in l]) @@ -1491,8 +1492,6 @@ ("y[*]" in immutable_fields) def test_hints(self): - from rpython.rlib.objectmodel import newlist_hint - strings = ['abc', 'def'] def f(i): z = strings[i] @@ -1569,8 +1568,8 @@ def test_no_unneeded_refs(self): def fndel(p, q): lis = ["5", "3", "99"] - assert q >= 0 - assert p >= 0 + assert_(q >= 0) + assert_(p >= 0) del lis[p:q] return lis def fnpop(n): @@ -1677,7 +1676,6 @@ def test_extend_was_not_overallocating(self): from rpython.rlib import rgc - from rpython.rlib.objectmodel import resizelist_hint from rpython.rtyper.lltypesystem import lltype old_arraycopy = rgc.ll_arraycopy try: diff --git a/rpython/rtyper/test/test_rordereddict.py b/rpython/rtyper/test/test_rordereddict.py --- a/rpython/rtyper/test/test_rordereddict.py +++ b/rpython/rtyper/test/test_rordereddict.py @@ -1,10 +1,10 @@ import py -import random from collections import OrderedDict from hypothesis import settings, given, strategies from hypothesis.stateful import run_state_machine_as_test +from rpython.rlib.objectmodel import assert_, r_ordereddict from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem import rordereddict, rstr from rpython.rlib.rarithmetic import intmask @@ -387,7 +387,7 @@ @staticmethod def new_r_dict(myeq, myhash): - return objectmodel.r_ordereddict(myeq, myhash) + return r_ordereddict(myeq, myhash) def test_two_dicts_with_different_value_types(self): def func(i): @@ -406,14 +406,14 @@ d1['key2'] = 'value2' for i in range(20): objectmodel.move_to_end(d1, 'key1') - assert d1.keys() == ['key2', 'key1'] + assert_(d1.keys() == ['key2', 'key1']) objectmodel.move_to_end(d1, 'key2') - assert d1.keys() == ['key1', 'key2'] + assert_(d1.keys() == ['key1', 'key2']) for i in range(20): objectmodel.move_to_end(d1, 'key2', last=False) - assert d1.keys() == ['key2', 'key1'] + assert_(d1.keys() == ['key2', 'key1']) objectmodel.move_to_end(d1, 'key1', last=False) - assert d1.keys() == ['key1', 'key2'] + assert_(d1.keys() == ['key1', 'key2']) func() self.interpret(func, []) diff --git a/rpython/rtyper/test/test_rpbc.py b/rpython/rtyper/test/test_rpbc.py --- a/rpython/rtyper/test/test_rpbc.py +++ b/rpython/rtyper/test/test_rpbc.py @@ -2,6 +2,7 @@ from rpython.annotator import model as annmodel from rpython.annotator import specialize +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.lltype import typeOf from rpython.rtyper.test.tool import BaseRtypingTest from rpython.rtyper.llannotation import SomePtr, lltype_to_annotation @@ -1604,7 +1605,7 @@ try: o.m() except KeyError: - assert 0 + raise ValueError return B().m() self.interpret_raises(KeyError, f, [7]) @@ -1717,7 +1718,7 @@ def cb2(): pass def g(cb, result): - assert (cb is None) == (result == 0) + assert_((cb is None) == (result == 0)) def h(cb): cb() def f(): diff --git a/rpython/rtyper/test/test_rptr.py b/rpython/rtyper/test/test_rptr.py --- a/rpython/rtyper/test/test_rptr.py +++ b/rpython/rtyper/test/test_rptr.py @@ -6,9 +6,11 @@ from rpython.rtyper.llannotation import SomePtr from rpython.annotator.annrpython import RPythonAnnotator from rpython.rlib.rarithmetic import is_valid_int +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.annlowlevel import annotate_lowlevel_helper, LowLevelAnnotatorPolicy from rpython.rtyper.lltypesystem import llmemory, lltype from rpython.rtyper.rtyper import RPythonTyper +from rpython.rtyper.test.test_llinterp import interpret # ____________________________________________________________ @@ -50,7 +52,6 @@ assert s == annmodel.SomeTuple([SomePtr(lltype.Ptr(lltype.RuntimeTypeInfo)), annmodel.SomeBool()]) -from rpython.rtyper.test.test_llinterp import interpret, gengraph def test_adtmeths(): policy = LowLevelAnnotatorPolicy() @@ -86,7 +87,6 @@ assert lltype.typeOf(a) == lltype.Ptr(A) assert len(a) == 10 - def f(): a = A.h_alloc(10) return a.h_length() @@ -104,15 +104,15 @@ S = lltype.GcStruct('S', ('t', T)) PT = lltype.Ptr(T) PS = lltype.Ptr(S) + def fn(n): s = lltype.cast_int_to_ptr(PS, n) - assert lltype.typeOf(s) == PS - assert lltype.cast_ptr_to_int(s) == n + assert_(lltype.typeOf(s) == PS) + assert_(lltype.cast_ptr_to_int(s) == n) t = lltype.cast_pointer(PT, s) - assert lltype.typeOf(t) == PT - assert lltype.cast_ptr_to_int(t) == n - assert s == lltype.cast_pointer(PS, t) - + assert_(lltype.typeOf(t) == PT) + assert_(lltype.cast_ptr_to_int(t) == n) + assert_(s == lltype.cast_pointer(PS, t)) interpret(fn, [11521]) def test_odd_ints_opaque(): @@ -120,12 +120,13 @@ Q = lltype.GcOpaqueType('Q') PT = lltype.Ptr(T) PQ = lltype.Ptr(Q) + def fn(n): t = lltype.cast_int_to_ptr(PT, n) - assert lltype.typeOf(t) == PT - assert lltype.cast_ptr_to_int(t) == n + assert_(lltype.typeOf(t) == PT) + assert_(lltype.cast_ptr_to_int(t) == n) o = lltype.cast_opaque_ptr(PQ, t) - assert lltype.cast_ptr_to_int(o) == n + assert_(lltype.cast_ptr_to_int(o) == n) fn(13) interpret(fn, [11521]) @@ -384,6 +385,7 @@ def test_interior_ptr_with_setitem(): T = lltype.GcStruct("T", ('s', lltype.Array(lltype.Signed))) + def f(): t = lltype.malloc(T, 1) t.s[0] = 1 @@ -393,18 +395,21 @@ def test_isinstance_ptr(): S = lltype.GcStruct("S", ('x', lltype.Signed)) + def f(n): x = isinstance(lltype.Signed, lltype.Ptr) return x + (lltype.typeOf(x) is lltype.Ptr(S)) + len(n) + def lltest(): f([]) return f([1]) s, t = ll_rtype(lltest, []) - assert s.is_constant() == False + assert s.is_constant() is False def test_staticadtmeths(): ll_func = lltype.staticAdtMethod(lambda x: x + 42) S = lltype.GcStruct('S', adtmeths={'ll_func': ll_func}) + def f(): return lltype.malloc(S).ll_func(5) s, t = ll_rtype(f, []) diff --git a/rpython/rtyper/test/test_rstr.py b/rpython/rtyper/test/test_rstr.py --- a/rpython/rtyper/test/test_rstr.py +++ b/rpython/rtyper/test/test_rstr.py @@ -4,6 +4,7 @@ from rpython.flowspace.model import summary from rpython.annotator.model import AnnotatorError +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem.lltype import typeOf, Signed, malloc from rpython.rtyper.lltypesystem.rstr import LLHelpers, STR from rpython.rtyper.rstr import AbstractLLHelpers @@ -357,20 +358,24 @@ def test_find_with_start(self): const = self.const + def fn(i): - assert i >= 0 + assert_(i >= 0) return const('ababcabc').find(const('abc'), i) + for i in range(9): res = self.interpret(fn, [i]) assert res == fn(i) def test_find_with_start_end(self): const = self.const + def fn(i, j): - assert i >= 0 - assert j >= 0 + assert_(i >= 0) + assert_(j >= 0) return (const('ababcabc').find(const('abc'), i, j) + const('ababcabc').find(const('b'), i, j) * 100) + for (i, j) in [(1,7), (2,6), (3,7), (3,8), (4,99), (7, 99)]: res = self.interpret(fn, [i, j]) assert res == fn(i, j) @@ -388,14 +393,16 @@ def test_find_empty_string(self): const = self.const + def f(i): - assert i >= 0 + assert_(i >= 0) s = const("abc") x = s.find(const('')) x+= s.find(const(''), i)*10 x+= s.find(const(''), i, i)*100 x+= s.find(const(''), i, i+1)*1000 return x + for i, expected in enumerate([0, 1110, 2220, 3330, -1110, -1110]): res = self.interpret(f, [i]) assert res == expected @@ -418,14 +425,16 @@ def test_rfind_empty_string(self): const = self.const + def f(i): - assert i >= 0 + assert_(i >= 0) s = const("abc") x = s.rfind(const('')) x+= s.rfind(const(''), i)*10 x+= s.rfind(const(''), i, i)*100 x+= s.rfind(const(''), i, i+1)*1000 return x + for i, expected in enumerate([1033, 2133, 3233, 3333, 3-1110, 3-1110]): res = self.interpret(f, [i]) assert res == expected @@ -557,7 +566,7 @@ def fn(i): c = ["a", "b", "c"] - assert i >= 0 + assert_(i >= 0) return const('').join(c[i:]) res = self.interpret(fn, [0]) assert self.ll_to_string(res) == const("abc") diff --git a/rpython/rtyper/test/test_rtuple.py b/rpython/rtyper/test/test_rtuple.py --- a/rpython/rtyper/test/test_rtuple.py +++ b/rpython/rtyper/test/test_rtuple.py @@ -1,11 +1,11 @@ import py +from rpython.rlib.objectmodel import assert_, compute_hash from rpython.rtyper.rtuple import TUPLE_TYPE, TupleRepr from rpython.rtyper.lltypesystem.lltype import Signed, Bool from rpython.rtyper.rbool import bool_repr from rpython.rtyper.rint import signed_repr from rpython.rtyper.test.tool import BaseRtypingTest from rpython.rtyper.error import TyperError -from rpython.rlib.objectmodel import compute_hash from rpython.translator.translator import TranslationContext @@ -290,7 +290,7 @@ res = [] for x in lst: res.append(list(x)) - assert res[0] == res[1] == res[2] == [] + assert_(res[0] == res[1] == res[2] == []) self.interpret(f, []) def test_slice(self): @@ -299,14 +299,14 @@ return t[1:] + t[:-1] + t[12:] + t[0:2] def f(n): res = g(n) - assert len(res) == 6 - assert res[0] == "hello" - assert res[1] == n - assert res[2] == 1.5 - assert res[3] == "hello" - assert res[4] == 1.5 - assert res[5] == "hello" - self.interpret(f, [9]) + assert_(len(res) == 6) + assert_(res[0] == "hello") + assert_(res[1] == n) + assert_(res[2] == 1.5) + assert_(res[3] == "hello") + assert_(res[4] == 1.5) + assert_(res[5] == "hello") + res = self.interpret(f, [9]) def test_tuple_eq(self): def f(n): @@ -350,8 +350,8 @@ def test_tuple_str(self): def f(n): - assert str(()) == "()" - assert str((n,)) == "(%d,)" % n - assert str((n, 6)) == "(%d, 6)" % n - assert str(((n,),)) == "((%d,),)" % n + assert_(str(()) == "()") + assert_(str((n,)) == "(%d,)" % n) + assert_(str((n, 6)) == "(%d, 6)" % n) + assert_(str(((n,),)) == "((%d,),)" % n) self.interpret(f, [3]) diff --git a/rpython/rtyper/test/test_rweakref.py b/rpython/rtyper/test/test_rweakref.py --- a/rpython/rtyper/test/test_rweakref.py +++ b/rpython/rtyper/test/test_rweakref.py @@ -1,5 +1,7 @@ -import py, weakref +import weakref + from rpython.rlib import rgc +from rpython.rlib.objectmodel import assert_ from rpython.rtyper.lltypesystem import lltype, llmemory from rpython.rtyper.test.tool import BaseRtypingTest @@ -68,9 +70,9 @@ r = w2 return r() is not None res = self.interpret(f, [1]) - assert res == False + assert res is False res = self.interpret(f, [0]) - assert res == True + assert res is True def test_multiple_prebuilt_dead_weakrefs(self): class A: @@ -95,22 +97,22 @@ r = w1 else: r = w3 - assert r() is None + assert_(r() is None) else: if n < -5: r = w2 else: r = w4 - assert r() is not None + assert_(r() is not None) return r() is not None res = self.interpret(f, [1]) - assert res == False + assert res is False res = self.interpret(f, [0]) - assert res == True + assert res is True res = self.interpret(f, [100]) - assert res == False + assert res is False res = self.interpret(f, [-100]) - assert res == True + assert res is True def test_pbc_null_weakref(self): class A: @@ -124,12 +126,12 @@ assert self.interpret(fn, [1]) is True def test_ll_weakref(self): - S = lltype.GcStruct('S', ('x',lltype.Signed)) + S = lltype.GcStruct('S', ('x', lltype.Signed)) def g(): s = lltype.malloc(S) w = llmemory.weakref_create(s) - assert llmemory.weakref_deref(lltype.Ptr(S), w) == s - assert llmemory.weakref_deref(lltype.Ptr(S), w) == s + assert_(llmemory.weakref_deref(lltype.Ptr(S), w) == s) + assert_(llmemory.weakref_deref(lltype.Ptr(S), w) == s) return w # 's' is forgotten here def f(): w = g() @@ -152,7 +154,7 @@ def fn(i): w = g() rgc.collect() - assert w() is not None + assert_(w() is not None) return mylist[i] is None assert self.interpret(fn, [0], rweakref=False) is False From pypy.commits at gmail.com Sun Nov 5 10:09:43 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 05 Nov 2017 07:09:43 -0800 (PST) Subject: [pypy-commit] pypy assert-rewrite: Fix asserts in test_newgc.py Message-ID: <59ff29b7.028b1c0a.476b5.69eb@mx.google.com> Author: Ronan Lamy Branch: assert-rewrite Changeset: r92948:fe04fd3b8632 Date: 2017-11-05 15:09 +0000 http://bitbucket.org/pypy/pypy/changeset/fe04fd3b8632/ Log: Fix asserts in test_newgc.py diff --git a/rpython/translator/c/test/test_newgc.py b/rpython/translator/c/test/test_newgc.py --- a/rpython/translator/c/test/test_newgc.py +++ b/rpython/translator/c/test/test_newgc.py @@ -9,7 +9,8 @@ from rpython.conftest import option from rpython.rlib import rgc -from rpython.rlib.objectmodel import keepalive_until_here, compute_hash, compute_identity_hash, r_dict +from rpython.rlib.objectmodel import ( + assert_, keepalive_until_here, compute_hash, compute_identity_hash, r_dict) from rpython.rlib.rstring import StringBuilder from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.rtyper.lltypesystem.lloperation import llop @@ -107,7 +108,7 @@ funcstr = funcsstr[num] if funcstr: return funcstr(arg) - assert 0, 'unreachable' + assert_(0, 'unreachable') cls.funcsstr = funcsstr cls.c_allfuncs = staticmethod(cls._makefunc_str_int(allfuncs)) cls.allfuncs = staticmethod(allfuncs) @@ -516,7 +517,7 @@ if i & 1 == 0: a = A() a.index = i - assert a is not None + assert_(a is not None) weakrefs.append(weakref.ref(a)) if i % 7 == 6: keepalive.append(a) @@ -525,9 +526,9 @@ for i in range(n): a = weakrefs[i]() if i % 7 == 6: - assert a is not None + assert_(a is not None) if a is not None: - assert a.index == i & ~1 + assert_(a.index == i & ~1) else: count_free += 1 return count_free @@ -585,10 +586,10 @@ for n in range(len(dlist)): d = dlist[n] keys = keyslist[n] - assert len(d) == len(keys) + assert_(len(d) == len(keys)) i = 0 while i < len(keys): - assert d[keys[i]] == i + assert_(d[keys[i]] == i) i += 1 return 42 return fn @@ -701,13 +702,13 @@ def does_stuff(): fd = os.open(filename, os.O_WRONLY | os.O_CREAT, 0777) count = os.write(fd, "hello world\n") - assert count == len("hello world\n") + assert_(count == len("hello world\n")) os.close(fd) fd = os.open(filename, os.O_RDONLY, 0777) result = os.lseek(fd, 1, 0) - assert result == 1 + assert_(result == 1) data = os.read(fd, 500) - assert data == "ello world\n" + assert_(data == "ello world\n") os.close(fd) return 0 @@ -870,7 +871,7 @@ # ^^^ likely to trigger a collection xr = xr.prev i += 1 - assert xr is None + assert_(xr is None) def check(xr, n, step): "Check that the identity hashes are still correct." @@ -882,7 +883,7 @@ raise ValueError xr = xr.prev i += 1 - assert xr is None + assert_(xr is None) def h(n): x3 = g(3) @@ -947,7 +948,7 @@ for i in range(20): x.append((1, lltype.malloc(S))) for i in range(50): - assert l2[i] == (40 + i) * 3 + assert_(l2[i] == (40 + i) * 3) return 0 return fn @@ -965,7 +966,7 @@ rgc.ll_arraycopy(l, l2, 40, 0, 50) rgc.collect() for i in range(50): - assert l2[i] == l[40 + i] + assert_(l2[i] == l[40 + i]) return 0 return fn @@ -985,7 +986,7 @@ found = True if x == lltype.cast_opaque_ptr(llmemory.GCREF, s.u): os.write(2, "s.u should not be found!\n") - assert False + assert_(False) return found == 1 def fn(): @@ -994,7 +995,7 @@ found = g(s) if not found: os.write(2, "not found!\n") - assert False + assert_(False) s.u.x = 42 return 0 @@ -1013,8 +1014,8 @@ gcref1 = lltype.cast_opaque_ptr(llmemory.GCREF, s) gcref2 = lltype.cast_opaque_ptr(llmemory.GCREF, s.u) lst = rgc.get_rpy_referents(gcref1) - assert gcref2 in lst - assert gcref1 not in lst + assert_(gcref2 in lst) + assert_(gcref1 not in lst) s.u.x = 42 return 0 @@ -1030,7 +1031,7 @@ def check(gcref, expected): result = rgc._is_rpy_instance(gcref) - assert result == expected + assert_(result == expected) def fn(): s = lltype.malloc(S) @@ -1060,21 +1061,21 @@ def fn(): foo = Foo() gcref1 = rgc.cast_instance_to_gcref(foo) - assert rgc.try_cast_gcref_to_instance(Foo, gcref1) is foo - assert rgc.try_cast_gcref_to_instance(FooBar, gcref1) is None - assert rgc.try_cast_gcref_to_instance(Biz, gcref1) is None + assert_(rgc.try_cast_gcref_to_instance(Foo, gcref1) is foo) + assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref1) is None) + assert_(rgc.try_cast_gcref_to_instance(Biz, gcref1) is None) foobar = FooBar() gcref2 = rgc.cast_instance_to_gcref(foobar) - assert rgc.try_cast_gcref_to_instance(Foo, gcref2) is foobar - assert rgc.try_cast_gcref_to_instance(FooBar, gcref2) is foobar - assert rgc.try_cast_gcref_to_instance(Biz, gcref2) is None + assert_(rgc.try_cast_gcref_to_instance(Foo, gcref2) is foobar) + assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref2) is foobar) + assert_(rgc.try_cast_gcref_to_instance(Biz, gcref2) is None) s = lltype.malloc(S) gcref3 = lltype.cast_opaque_ptr(llmemory.GCREF, s) - assert rgc.try_cast_gcref_to_instance(Foo, gcref3) is None - assert rgc.try_cast_gcref_to_instance(FooBar, gcref3) is None - assert rgc.try_cast_gcref_to_instance(Biz, gcref3) is None + assert_(rgc.try_cast_gcref_to_instance(Foo, gcref3) is None) + assert_(rgc.try_cast_gcref_to_instance(FooBar, gcref3) is None) + assert_(rgc.try_cast_gcref_to_instance(Biz, gcref3) is None) return 0 @@ -1101,13 +1102,13 @@ a = lltype.malloc(A, 1000) gcref1 = lltype.cast_opaque_ptr(llmemory.GCREF, s) int1 = rgc.get_rpy_memory_usage(gcref1) - assert 8 <= int1 <= 32 + assert_(8 <= int1 <= 32) gcref2 = lltype.cast_opaque_ptr(llmemory.GCREF, s.u) int2 = rgc.get_rpy_memory_usage(gcref2) - assert 4 * 9 <= int2 <= 8 * 12 + assert_(4 * 9 <= int2 <= 8 * 12) gcref3 = lltype.cast_opaque_ptr(llmemory.GCREF, a) int3 = rgc.get_rpy_memory_usage(gcref3) - assert 4 * 1001 <= int3 <= 8 * 1010 + assert_(4 * 1001 <= int3 <= 8 * 1010) return 0 return fn @@ -1133,10 +1134,10 @@ int3 = rgc.get_rpy_type_index(gcref3) gcref4 = lltype.cast_opaque_ptr(llmemory.GCREF, s2) int4 = rgc.get_rpy_type_index(gcref4) - assert int1 != int2 - assert int1 != int3 - assert int2 != int3 - assert int1 == int4 + assert_(int1 != int2) + assert_(int1 != int3) + assert_(int2 != int3) + assert_(int1 == int4) return 0 return fn @@ -1216,8 +1217,8 @@ os.close(fd) # a = rgc.get_typeids_list() - assert len(a) > 1 - assert 0 < rffi.cast(lltype.Signed, a[1]) < 10000 + assert_(len(a) > 1) + assert_(0 < rffi.cast(lltype.Signed, a[1]) < 10000) return 0 return fn @@ -1240,20 +1241,20 @@ a2 = A() if not rgc.has_gcflag_extra(): return 0 # cannot test it then - assert rgc.get_gcflag_extra(a1) == False - assert rgc.get_gcflag_extra(a2) == False + assert_(rgc.get_gcflag_extra(a1) == False) + assert_(rgc.get_gcflag_extra(a2) == False) rgc.toggle_gcflag_extra(a1) - assert rgc.get_gcflag_extra(a1) == True - assert rgc.get_gcflag_extra(a2) == False + assert_(rgc.get_gcflag_extra(a1) == True) + assert_(rgc.get_gcflag_extra(a2) == False) rgc.toggle_gcflag_extra(a2) - assert rgc.get_gcflag_extra(a1) == True - assert rgc.get_gcflag_extra(a2) == True + assert_(rgc.get_gcflag_extra(a1) == True) + assert_(rgc.get_gcflag_extra(a2) == True) rgc.toggle_gcflag_extra(a1) - assert rgc.get_gcflag_extra(a1) == False - assert rgc.get_gcflag_extra(a2) == True + assert_(rgc.get_gcflag_extra(a1) == False) + assert_(rgc.get_gcflag_extra(a2) == True) rgc.toggle_gcflag_extra(a2) - assert rgc.get_gcflag_extra(a1) == False - assert rgc.get_gcflag_extra(a2) == False + assert_(rgc.get_gcflag_extra(a1) == False) + assert_(rgc.get_gcflag_extra(a2) == False) return 0 return fn @@ -1271,11 +1272,11 @@ def fn(): s = lltype.malloc(S, zero=True) - assert s.x == 0 + assert_(s.x == 0) s2 = lltype.malloc(S2, zero=True) - assert s2.parent.x == 0 + assert_(s2.parent.x == 0) a = lltype.malloc(A, 3, zero=True) - assert a[2] == 0 + assert_(a[2] == 0) # XXX not supported right now in gctransform/framework.py: #b = lltype.malloc(B, 3, zero=True) #assert len(b.y) == 3 @@ -1307,7 +1308,7 @@ def test_long_chain_of_instances(self): res = self.run("long_chain_of_instances") assert res == 1500 - + class TestSemiSpaceGC(UsingFrameworkTest, snippet.SemiSpaceGCTestDefines): gcpolicy = "semispace" @@ -1475,7 +1476,7 @@ def define_nursery_hash_base(cls): from rpython.rlib.debug import debug_print - + class A: pass def fn(): @@ -1492,7 +1493,7 @@ debug_print("objects", len(objects)) for i in range(len(objects)): debug_print(i) - assert compute_identity_hash(objects[i]) == hashes[i] + assert_(compute_identity_hash(objects[i]) == hashes[i]) debug_print("storing in dict") unique[hashes[i]] = None debug_print("done") @@ -1528,10 +1529,10 @@ def check(lst): hashes = [] for i, (s, a) in enumerate(lst): - assert a.x == i + assert_(a.x == i) rgc.ll_write_final_null_char(s) for i, (s, a) in enumerate(lst): - assert a.x == i # check it was not overwritten + assert_(a.x == i) # check it was not overwritten def fn(): check(prebuilt) lst1 = [] @@ -1733,7 +1734,7 @@ assert popen.wait() in (-6, 134) # aborted # note: it seems that on some systems we get 134 and on # others we get -6. Bash is supposed to translate the - # SIGABRT (signal 6) from the subprocess into the exit + # SIGABRT (signal 6) from the subprocess into the exit # code 128+6, but I guess it may not always do so. assert 'out of memory:' in child_stderr return '42' From pypy.commits at gmail.com Sun Nov 5 10:28:07 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 05 Nov 2017 07:28:07 -0800 (PST) Subject: [pypy-commit] pypy default: kill test that has been disabled for 6 years Message-ID: <59ff2e07.90051c0a.ada36.94b2@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92949:50ba491d0e92 Date: 2017-11-05 15:27 +0000 http://bitbucket.org/pypy/pypy/changeset/50ba491d0e92/ Log: kill test that has been disabled for 6 years diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py --- a/rpython/jit/metainterp/test/test_del.py +++ b/rpython/jit/metainterp/test/test_del.py @@ -82,46 +82,5 @@ assert res == 1 self.check_resops(call_r=1) # for the case B(), but not for the case A() - def test_keepalive(self): - py.test.skip("XXX fails") # hum, I think the test itself is broken - # - mydriver = JitDriver(reds = ['n', 'states'], greens = []) - class State: - num = 1 - class X: - def __init__(self, state): - self.state = state - def __del__(self): - self.state.num += 1 - @dont_look_inside - def do_stuff(): - pass - def f(n): - states = [] - while n > 0: - mydriver.jit_merge_point(n=n, states=states) - state = State() - states.append(state) - x = X(state) - do_stuff() - state.num *= 1000 - do_stuff() - keepalive_until_here(x) - n -= 1 - return states - def main(n): - states = f(n) - rgc.collect() - rgc.collect() - err = 1001 - for state in states: - if state.num != 1001: - err = state.num - print 'ERROR:', err - return err - assert main(20) == 1001 - res = self.meta_interp(main, [20]) - assert res == 1001 - class TestLLtype(DelTests, LLJitMixin): pass From pypy.commits at gmail.com Sun Nov 5 14:28:05 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 05 Nov 2017 11:28:05 -0800 (PST) Subject: [pypy-commit] pypy default: add method used in matplotlib Message-ID: <59ff6645.07d81c0a.c75d9.0cb1@mx.google.com> Author: Matti Picus Branch: Changeset: r92950:bab05da3f317 Date: 2017-11-05 21:17 +0200 http://bitbucket.org/pypy/pypy/changeset/bab05da3f317/ Log: add method used in matplotlib diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. From pypy.commits at gmail.com Sun Nov 5 15:50:00 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 05 Nov 2017 12:50:00 -0800 (PST) Subject: [pypy-commit] pypy assert-rewrite: more assert fixes Message-ID: <59ff7978.03a7df0a.ebb7f.4749@mx.google.com> Author: Ronan Lamy Branch: assert-rewrite Changeset: r92951:7318052f560c Date: 2017-11-05 20:49 +0000 http://bitbucket.org/pypy/pypy/changeset/7318052f560c/ Log: more assert fixes diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py --- a/rpython/jit/metainterp/test/test_ajit.py +++ b/rpython/jit/metainterp/test/test_ajit.py @@ -3,6 +3,7 @@ import py import weakref +from rpython.rlib.objectmodel import assert_ from rpython.rlib import rgc from rpython.jit.codewriter.policy import StopAtXPolicy from rpython.jit.metainterp import history @@ -121,7 +122,7 @@ res += ovfcheck(x * x) y -= 1 except OverflowError: - assert 0 + assert_(0) return res res = self.meta_interp(f, [6, 7]) assert res == 1323 @@ -157,7 +158,7 @@ try: res += ovfcheck(x * x) + b except OverflowError: - assert 0 + assert_(0) y -= 1 return res res = self.meta_interp(f, [6, 7]) @@ -793,7 +794,7 @@ return llop.int_between(lltype.Bool, arg1, arg2, arg3) """ % locals()).compile() in loc res = self.interp_operations(loc['f'], [5, 6, 7]) - assert res == expect_result + assert_(res == expect_result) self.check_operations_history(expect_operations) # check('n', 'm', 'p', True, int_sub=2, uint_lt=1) @@ -997,7 +998,7 @@ while i < 10: myjitdriver.can_enter_jit(i=i, t=t) myjitdriver.jit_merge_point(i=i, t=t) - assert i > 0 + assert_(i > 0) t += int_c_div(100, i) - int_c_mod(100, i) i += 1 return t @@ -1220,7 +1221,7 @@ # to the backend at all: ZeroDivisionError # def f(n): - assert n >= 0 + assert_(n >= 0) try: return ovfcheck(5 % n) except ZeroDivisionError: @@ -1231,7 +1232,7 @@ assert res == -666 # def f(n): - assert n >= 0 + assert_(n >= 0) try: return ovfcheck(6 // n) except ZeroDivisionError: @@ -1350,7 +1351,7 @@ else: obj = A() obj.a = 17 - assert isinstance(obj, B) + assert_(isinstance(obj, B)) return obj.a res = self.interp_operations(fn, [1]) assert res == 1 @@ -1922,8 +1923,8 @@ a2 = f(A(x), y) b1 = f(B(x), y) b2 = f(B(x), y) - assert a1.val == a2.val - assert b1.val == b2.val + assert_(a1.val == a2.val) + assert_(b1.val == b2.val) return a1.val + b1.val res = self.meta_interp(g, [6, 7]) assert res == 6*8 + 6**8 @@ -1966,8 +1967,8 @@ a2 = f(A(x), y) b1 = f(B(x), y) b2 = f(B(x), y) - assert a1.val == a2.val - assert b1.val == b2.val + assert_(a1.val == a2.val) + assert_(b1.val == b2.val) return a1.val + b1.val res = self.meta_interp(g, [6, 20]) assert res == g(6, 20) @@ -2001,16 +2002,16 @@ def g(x, y): a1 = f(A(x), y, A(x)) a2 = f(A(x), y, A(x)) - assert a1.val == a2.val + assert_(a1.val == a2.val) b1 = f(B(x), y, B(x)) b2 = f(B(x), y, B(x)) - assert b1.val == b2.val + assert_(b1.val == b2.val) c1 = f(B(x), y, A(x)) c2 = f(B(x), y, A(x)) - assert c1.val == c2.val + assert_(c1.val == c2.val) d1 = f(A(x), y, B(x)) d2 = f(A(x), y, B(x)) - assert d1.val == d2.val + assert_(d1.val == d2.val) return a1.val + b1.val + c1.val + d1.val res = self.meta_interp(g, [3, 14]) assert res == g(3, 14) @@ -2041,7 +2042,7 @@ def g(x, y): c1 = f(A(x), y, B(x)) c2 = f(A(x), y, B(x)) - assert c1.val == c2.val + assert_(c1.val == c2.val) return c1.val res = self.meta_interp(g, [3, 16]) assert res == g(3, 16) @@ -2068,7 +2069,7 @@ def g(x, y): a1 = f(A(x), y, A(x)) a2 = f(A(x), y, A(x)) - assert a1.val == a2.val + assert_(a1.val == a2.val) return a1.val res = self.meta_interp(g, [3, 14]) assert res == g(3, 14) @@ -2093,7 +2094,7 @@ def g(x, y): a1 = f(A(x), y) a2 = f(A(x), y) - assert a1.val == a2.val + assert_(a1.val == a2.val) return a1.val res = self.meta_interp(g, [6, 14]) assert res == g(6, 14) @@ -2120,7 +2121,7 @@ def g(x, y): a1 = f(A(x), y) a2 = f(A(x), y) - assert a1.val == a2.val + assert_(a1.val == a2.val) return a1.val res = self.meta_interp(g, [6, 14]) assert res == g(6, 14) @@ -2156,8 +2157,8 @@ a2 = f(A(x), y) b1 = f(B(x), y) b2 = f(B(x), y) - assert a1.val == a2.val - assert b1.val == b2.val + assert_(a1.val == a2.val) + assert_(b1.val == b2.val) return a1.val + b1.val res = self.meta_interp(g, [3, 23]) assert res == 7068153 @@ -2730,7 +2731,7 @@ try: sa += ovfcheck(i + i) except OverflowError: - assert 0 + assert_(0) node1 = A(i) i += 1 assert self.meta_interp(f, [20, 7]) == f(20, 7) @@ -2762,7 +2763,7 @@ sa += 1 else: sa += 2 - assert -100 < i < 100 + assert_(-100 < i < 100) i += 1 return sa assert self.meta_interp(f, [20]) == f(20) @@ -2783,7 +2784,7 @@ sa += 1 else: sa += 2 - assert -100 <= node.val <= 100 + assert_(-100 <= node.val <= 100) i += 1 return sa assert self.meta_interp(f, [20]) == f(20) @@ -3863,13 +3864,13 @@ def f(x): a = make(x) if x > 0: - assert isinstance(a, A) + assert_(isinstance(a, A)) z = a.f() elif x < 0: - assert isinstance(a, B) + assert_(isinstance(a, B)) z = a.f() else: - assert isinstance(a, C) + assert_(isinstance(a, C)) z = a.f() return z + a.g() res1 = f(6) @@ -4285,7 +4286,7 @@ return x > x or x > x if cmp == 'ge': return x >= x and x >= x - assert 0 + assert_(0) return f def make_str(cmp): @@ -4295,7 +4296,7 @@ return x is x or x is x if cmp == 'ne': return x is not x and x is not x - assert 0 + assert_(0) return f def make_object(cmp): @@ -4307,7 +4308,7 @@ return x is x if cmp == 'ne': return x is not x - assert 0 + assert_(0) return f for cmp in 'eq ne lt le gt ge'.split(): diff --git a/rpython/jit/metainterp/test/test_bytearray.py b/rpython/jit/metainterp/test/test_bytearray.py --- a/rpython/jit/metainterp/test/test_bytearray.py +++ b/rpython/jit/metainterp/test/test_bytearray.py @@ -1,13 +1,14 @@ import py +from rpython.rlib.objectmodel import assert_ from rpython.jit.metainterp.test.support import LLJitMixin -from rpython.rlib.jit import JitDriver, dont_look_inside +from rpython.rlib.jit import dont_look_inside class TestByteArray(LLJitMixin): def test_getitem(self): x = bytearray("foobar") def fn(n): - assert n >= 0 + assert_(n >= 0) return x[n] res = self.interp_operations(fn, [3]) assert res == ord('b') @@ -31,7 +32,7 @@ def make_me(): return bytearray("foobar") def fn(n): - assert n >= 0 + assert_(n >= 0) x = make_me() x[n] = 3 return x[3] + 1000 * x[4] diff --git a/rpython/jit/metainterp/test/test_call.py b/rpython/jit/metainterp/test/test_call.py --- a/rpython/jit/metainterp/test/test_call.py +++ b/rpython/jit/metainterp/test/test_call.py @@ -1,4 +1,4 @@ - +from rpython.rlib.objectmodel import assert_ from rpython.jit.metainterp.test.support import LLJitMixin, noConst from rpython.rlib import jit @@ -146,8 +146,8 @@ while n > 0: myjitdriver.can_enter_jit(n=n, p=p, m=m) myjitdriver.jit_merge_point(n=n, p=p, m=m) - assert p > -1 - assert p < 1 + assert_(p > -1) + assert_(p < 1) n -= jit.conditional_call_elidable(p, externfn, n) return n res = self.meta_interp(f, [21, 5, 0]) @@ -165,8 +165,8 @@ while n > 0: myjitdriver.can_enter_jit(n=n, p=p, m=m) myjitdriver.jit_merge_point(n=n, p=p, m=m) - assert p > -1 - assert p < 1 + assert_(p > -1) + assert_(p < 1) n0 = n n -= jit.conditional_call_elidable(p, externfn, n0) n -= jit.conditional_call_elidable(p, externfn, n0) diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py --- a/rpython/jit/metainterp/test/test_del.py +++ b/rpython/jit/metainterp/test/test_del.py @@ -1,6 +1,6 @@ import py from rpython.rlib.jit import JitDriver, dont_look_inside -from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.objectmodel import keepalive_until_here, assert_ from rpython.rlib import rgc from rpython.jit.metainterp.test.support import LLJitMixin @@ -30,7 +30,7 @@ 'jump': 1}) def test_class_of_allocated(self): - myjitdriver = JitDriver(greens = [], reds = ['n', 'x']) + myjitdriver = JitDriver(greens=[], reds=['n', 'x']) class Foo: def __del__(self): pass @@ -49,16 +49,15 @@ myjitdriver.jit_merge_point(x=x, n=n) x = X() y = Y() - assert x.f() == 456 - assert y.f() == 123 + assert_(x.f() == 456) + assert_(y.f() == 123) n -= 1 return 42 res = self.meta_interp(f, [20]) assert res == 42 def test_instantiate_with_or_without_del(self): - import gc - mydriver = JitDriver(reds = ['n', 'x'], greens = []) + mydriver = JitDriver(reds=['n', 'x'], greens=[]) class Base: pass class A(Base): foo = 72 class B(Base): diff --git a/rpython/jit/metainterp/test/test_dict.py b/rpython/jit/metainterp/test/test_dict.py --- a/rpython/jit/metainterp/test/test_dict.py +++ b/rpython/jit/metainterp/test/test_dict.py @@ -1,8 +1,8 @@ +from collections import OrderedDict import py +from rpython.rlib.objectmodel import assert_, r_dict, compute_hash from rpython.jit.metainterp.test.support import LLJitMixin from rpython.rlib.jit import JitDriver -from rpython.rlib import objectmodel -from collections import OrderedDict class DictTests: @staticmethod @@ -104,7 +104,7 @@ return (x & 1) == (y & 1) def f(n): - dct = objectmodel.r_dict(eq, key) + dct = r_dict(eq, key) total = n while total: myjitdriver.jit_merge_point(total=total, dct=dct) @@ -145,7 +145,7 @@ return (x & 1) == (y & 1) def f(n): - dct = objectmodel.r_dict(eq, key) + dct = r_dict(eq, key) total = n while total: myjitdriver.jit_merge_point(total=total, dct=dct) @@ -169,13 +169,13 @@ def eq_func(a, b): return a.value == b.value def hash_func(x): - return objectmodel.compute_hash(x.value) + return compute_hash(x.value) def f(n): d = None while n > 0: myjitdriver.jit_merge_point(n=n, d=d) - d = objectmodel.r_dict(eq_func, hash_func) + d = r_dict(eq_func, hash_func) y = Wrapper(str(n)) d[y] = n - 1 n = d[y] @@ -331,7 +331,7 @@ return (x % 2) == (y % 2) def f(n): - dct = objectmodel.r_dict(eq, key) + dct = r_dict(eq, key) total = n x = 44444 y = 55555 @@ -398,7 +398,7 @@ d[2] = 6 d[1] = 4 lst = d.items() - assert len(lst) == 4 + assert_(len(lst) == 4) return ( lst[0][0] + 10*lst[0][1] + 100*lst[1][0] + 1000*lst[1][1] + 10000*lst[3][0] + 100000*lst[2][1] + diff --git a/rpython/jit/metainterp/test/test_exception.py b/rpython/jit/metainterp/test/test_exception.py --- a/rpython/jit/metainterp/test/test_exception.py +++ b/rpython/jit/metainterp/test/test_exception.py @@ -2,7 +2,7 @@ from rpython.jit.metainterp.test.support import LLJitMixin from rpython.rlib.jit import JitDriver, dont_look_inside from rpython.rlib.rarithmetic import ovfcheck, LONG_BIT, intmask -from rpython.rlib.objectmodel import keepalive_until_here +from rpython.rlib.objectmodel import keepalive_until_here, assert_ from rpython.jit.codewriter.policy import StopAtXPolicy from rpython.rtyper.lltypesystem import lltype, rffi @@ -633,11 +633,11 @@ try: rescall(i) except KeyError: - assert i < 10 + assert_(i < 10) except ValueError: - assert i >= 20 + assert_(i >= 20) else: - assert 10 <= i < 20 + assert_(10 <= i < 20) i += 1 return i res = self.meta_interp(f, [0], inline=True) diff --git a/rpython/jit/metainterp/test/test_fficall.py b/rpython/jit/metainterp/test/test_fficall.py --- a/rpython/jit/metainterp/test/test_fficall.py +++ b/rpython/jit/metainterp/test/test_fficall.py @@ -6,6 +6,7 @@ from rpython.rtyper.annlowlevel import llhelper from rpython.jit.metainterp.test.support import LLJitMixin from rpython.jit.codewriter.longlong import is_longlong, is_64_bit +from rpython.rlib.objectmodel import assert_ from rpython.rlib import jit from rpython.rlib import jit_libffi from rpython.rlib.jit_libffi import (types, CIF_DESCRIPTION, FFI_TYPE_PP, @@ -31,11 +32,11 @@ Context manager to monkey patch jit_libffi with our custom "libffi-like" function """ - + def __init__(self, fake_call_impl_any): self.fake_call_impl_any = fake_call_impl_any self.monkey = monkeypatch() - + def __enter__(self, *args): self.monkey.setattr(jit_libffi, 'jit_ffi_call_impl_any', self.fake_call_impl_any) @@ -61,7 +62,7 @@ if (lltype.typeOf(exp_a) == rffi.ULONG and lltype.typeOf(a) == lltype.Signed): a = rffi.cast(rffi.ULONG, a) - assert a == exp_a + assert_(a == exp_a) return rvalue FUNC = lltype.FuncType([lltype.typeOf(avalue) for avalue in avalues], lltype.typeOf(rvalue)) @@ -88,7 +89,7 @@ lltype.typeOf(avalue) is rffi.UCHAR): got = intmask(got) avalue = intmask(avalue) - assert got == avalue + assert_(got == avalue) ofs += 16 write_to_ofs = 0 if rvalue is not None: @@ -312,7 +313,7 @@ # call_release_gil was simply lost and when guard_not_forced # failed, and the value of "res" was unpredictable. # See commit b84ff38f34bd and subsequents. - assert res == n*2 + assert_(res == n*2) jit.virtual_ref_finish(vref, xy) exctx.topframeref = jit.vref_None n += 1 @@ -322,7 +323,7 @@ assert f() == 100 res = self.meta_interp(f, []) assert res == 100 - + class TestFfiCall(FfiCallTests, LLJitMixin): def test_jit_ffi_vref(self): @@ -349,7 +350,7 @@ # jit_ffi_prep_cif(cd) # - assert rffi.sizeof(rffi.DOUBLE) == 8 + assert_(rffi.sizeof(rffi.DOUBLE) == 8) exb = lltype.malloc(rffi.DOUBLEP.TO, 8, flavor='raw') exb[2] = 1.23 jit_ffi_call(cd, math_sin, rffi.cast(rffi.CCHARP, exb)) diff --git a/rpython/jit/metainterp/test/test_jitiface.py b/rpython/jit/metainterp/test/test_jitiface.py --- a/rpython/jit/metainterp/test/test_jitiface.py +++ b/rpython/jit/metainterp/test/test_jitiface.py @@ -1,19 +1,18 @@ -import py -from rpython.rlib.jit import JitDriver, JitHookInterface, Counters, dont_look_inside +from rpython.rlib.objectmodel import assert_ +from rpython.rlib.jit import ( + JitDriver, JitHookInterface, Counters, dont_look_inside) from rpython.rlib import jit_hooks from rpython.jit.metainterp.test.support import LLJitMixin from rpython.jit.codewriter.policy import JitPolicy -from rpython.jit.metainterp.resoperation import rop -from rpython.rtyper.annlowlevel import hlstr, cast_instance_to_gcref +from rpython.rtyper.annlowlevel import cast_instance_to_gcref from rpython.jit.metainterp.jitprof import Profiler, EmptyProfiler -from rpython.jit.codewriter.policy import JitPolicy class JitHookInterfaceTests(object): # !!!note!!! - don't subclass this from the backend. Subclass the LL # class later instead - + def test_abort_quasi_immut(self): reasons = [] @@ -71,7 +70,7 @@ iface = MyJitIface() - driver = JitDriver(greens = ['n', 'm'], reds = ['i']) + driver = JitDriver(greens=['n', 'm'], reds=['i']) def loop(n, m): i = 0 @@ -94,7 +93,7 @@ def test_on_compile_bridge(self): called = [] - + class MyJitIface(JitHookInterface): def after_compile(self, di): called.append("compile") @@ -104,8 +103,8 @@ def before_compile_bridge(self, di): called.append("before_compile_bridge") - - driver = JitDriver(greens = ['n', 'm'], reds = ['i']) + + driver = JitDriver(greens=['n', 'm'], reds=['i']) def loop(n, m): i = 0 @@ -120,7 +119,7 @@ assert called == ["compile", "before_compile_bridge", "compile_bridge"] def test_get_stats(self): - driver = JitDriver(greens = [], reds = ['i', 's']) + driver = JitDriver(greens=[], reds=['i', 's']) def loop(i): s = 0 @@ -134,31 +133,33 @@ def main(): loop(30) - assert jit_hooks.stats_get_counter_value(None, - Counters.TOTAL_COMPILED_LOOPS) == 1 - assert jit_hooks.stats_get_counter_value(None, - Counters.TOTAL_COMPILED_BRIDGES) == 1 - assert jit_hooks.stats_get_counter_value(None, - Counters.TRACING) == 2 - assert jit_hooks.stats_get_times_value(None, Counters.TRACING) >= 0 + assert_(jit_hooks.stats_get_counter_value( + None, Counters.TOTAL_COMPILED_LOOPS) == 1) + assert_(jit_hooks.stats_get_counter_value( + None, Counters.TOTAL_COMPILED_BRIDGES) == 1) + assert_(jit_hooks.stats_get_counter_value( + None, Counters.TRACING) == 2) + assert_(jit_hooks.stats_get_times_value( + None, Counters.TRACING) >= 0) self.meta_interp(main, [], ProfilerClass=Profiler) def test_get_stats_empty(self): - driver = JitDriver(greens = [], reds = ['i']) + driver = JitDriver(greens=[], reds=['i']) def loop(i): while i > 0: driver.jit_merge_point(i=i) i -= 1 def main(): loop(30) - assert jit_hooks.stats_get_counter_value(None, - Counters.TOTAL_COMPILED_LOOPS) == 0 - assert jit_hooks.stats_get_times_value(None, Counters.TRACING) == 0 + assert_(jit_hooks.stats_get_counter_value( + None, Counters.TOTAL_COMPILED_LOOPS) == 0) + assert_(jit_hooks.stats_get_times_value( + None, Counters.TRACING) == 0) self.meta_interp(main, [], ProfilerClass=EmptyProfiler) def test_get_jitcell_at_key(self): - driver = JitDriver(greens = ['s'], reds = ['i'], name='jit') + driver = JitDriver(greens=['s'], reds=['i'], name='jit') def loop(i, s): while i > s: @@ -167,17 +168,17 @@ def main(s): loop(30, s) - assert jit_hooks.get_jitcell_at_key("jit", s) - assert not jit_hooks.get_jitcell_at_key("jit", s + 1) + assert_(jit_hooks.get_jitcell_at_key("jit", s)) + assert_(not jit_hooks.get_jitcell_at_key("jit", s + 1)) jit_hooks.trace_next_iteration("jit", s + 1) loop(s + 3, s + 1) - assert jit_hooks.get_jitcell_at_key("jit", s + 1) + assert_(jit_hooks.get_jitcell_at_key("jit", s + 1)) self.meta_interp(main, [5]) self.check_jitcell_token_count(2) def test_get_jitcell_at_key_ptr(self): - driver = JitDriver(greens = ['s'], reds = ['i'], name='jit') + driver = JitDriver(greens=['s'], reds=['i'], name='jit') class Green(object): pass @@ -193,17 +194,17 @@ g1_ptr = cast_instance_to_gcref(g1) g2_ptr = cast_instance_to_gcref(g2) loop(10, g1) - assert jit_hooks.get_jitcell_at_key("jit", g1_ptr) - assert not jit_hooks.get_jitcell_at_key("jit", g2_ptr) + assert_(jit_hooks.get_jitcell_at_key("jit", g1_ptr)) + assert_(not jit_hooks.get_jitcell_at_key("jit", g2_ptr)) jit_hooks.trace_next_iteration("jit", g2_ptr) loop(2, g2) - assert jit_hooks.get_jitcell_at_key("jit", g2_ptr) + assert_(jit_hooks.get_jitcell_at_key("jit", g2_ptr)) self.meta_interp(main, [5]) self.check_jitcell_token_count(2) def test_dont_trace_here(self): - driver = JitDriver(greens = ['s'], reds = ['i', 'k'], name='jit') + driver = JitDriver(greens=['s'], reds=['i', 'k'], name='jit') def loop(i, s): k = 4 @@ -228,10 +229,10 @@ self.check_resops(call_assembler_n=8) def test_trace_next_iteration_hash(self): - driver = JitDriver(greens = ['s'], reds = ['i'], name="name") + driver = JitDriver(greens=['s'], reds=['i'], name="name") class Hashes(object): check = False - + def __init__(self): self.l = [] self.t = [] @@ -281,9 +282,9 @@ class LLJitHookInterfaceTests(JitHookInterfaceTests): # use this for any backend, instead of the super class - + def test_ll_get_stats(self): - driver = JitDriver(greens = [], reds = ['i', 's']) + driver = JitDriver(greens=[], reds=['i', 's']) def loop(i): s = 0 @@ -292,7 +293,7 @@ if i % 2: s += 1 i -= 1 - s+= 2 + s += 2 return s def main(b): @@ -300,27 +301,27 @@ loop(30) l = jit_hooks.stats_get_loop_run_times(None) if b: - assert len(l) == 4 + assert_(len(l) == 4) # completely specific test that would fail each time # we change anything major. for now it's 4 # (loop, bridge, 2 entry points) - assert l[0].type == 'e' - assert l[0].number == 0 - assert l[0].counter == 4 - assert l[1].type == 'l' - assert l[1].counter == 4 - assert l[2].type == 'l' - assert l[2].counter == 23 - assert l[3].type == 'b' - assert l[3].number == 4 - assert l[3].counter == 11 + assert_(l[0].type == 'e') + assert_(l[0].number == 0) + assert_(l[0].counter == 4) + assert_(l[1].type == 'l') + assert_(l[1].counter == 4) + assert_(l[2].type == 'l') + assert_(l[2].counter == 23) + assert_(l[3].type == 'b') + assert_(l[3].number == 4) + assert_(l[3].counter == 11) else: - assert len(l) == 0 + assert_(len(l) == 0) self.meta_interp(main, [True], ProfilerClass=Profiler) # this so far does not work because of the way setup_once is done, # but fine, it's only about untranslated version anyway #self.meta_interp(main, [False], ProfilerClass=Profiler) - + class TestJitHookInterface(JitHookInterfaceTests, LLJitMixin): pass diff --git a/rpython/jit/metainterp/test/test_loop.py b/rpython/jit/metainterp/test/test_loop.py --- a/rpython/jit/metainterp/test/test_loop.py +++ b/rpython/jit/metainterp/test/test_loop.py @@ -1,19 +1,20 @@ import py -from rpython.rlib.jit import JitDriver, hint, set_param, dont_look_inside,\ - elidable -from rpython.rlib.objectmodel import compute_hash +from rpython.rlib.jit import ( + JitDriver, set_param, dont_look_inside, elidable) +from rpython.rlib.objectmodel import compute_hash, assert_ +from rpython.rlib.rerased import new_erasing_pair +from rpython.rtyper.lltypesystem import lltype + from rpython.jit.metainterp.warmspot import ll_meta_interp, get_stats from rpython.jit.metainterp.test.support import LLJitMixin from rpython.jit.codewriter.policy import StopAtXPolicy -from rpython.jit.metainterp.resoperation import rop -from rpython.jit.metainterp import history class LoopTest(object): enable_opts = '' automatic_promotion_result = { - 'int_add' : 6, 'int_gt' : 1, 'guard_false' : 1, 'jump' : 1, - 'guard_value' : 3 + 'int_add': 6, 'int_gt': 1, 'guard_false': 1, 'jump': 1, + 'guard_value': 3 } def meta_interp(self, f, args, policy=None, backendopt=False): @@ -26,7 +27,8 @@ return f(*args) def test_simple_loop(self): - myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res']) + myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res']) + def f(x, y): res = 0 while y > 0: @@ -40,7 +42,8 @@ self.check_trace_count(1) def test_loop_with_delayed_setfield(self): - myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res', 'a']) + myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res', 'a']) + class A(object): def __init__(self): self.x = 3 @@ -67,7 +70,7 @@ def test_loop_with_two_paths(self): from rpython.rtyper.lltypesystem import lltype from rpython.rtyper.lltypesystem.lloperation import llop - myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'res']) + myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'res']) def l(y, x, t): llop.debug_print(lltype.Void, y, x, t) @@ -96,7 +99,7 @@ self.check_trace_count(2) def test_alternating_loops(self): - myjitdriver = JitDriver(greens = [], reds = ['pattern']) + myjitdriver = JitDriver(greens=[], reds=['pattern']) def f(pattern): while pattern > 0: myjitdriver.can_enter_jit(pattern=pattern) @@ -114,7 +117,7 @@ self.check_trace_count(2) def test_interp_simple(self): - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'y']) bytecode = "bedca" def f(x, y): i = 0 @@ -139,7 +142,7 @@ self.check_trace_count(0) def test_green_prevents_loop(self): - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'y']) bytecode = "+--+++++----" def f(x, y): i = 0 @@ -158,7 +161,7 @@ self.check_trace_count(0) def test_interp_single_loop(self): - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'y']) bytecode = "abcd" def f(x, y): i = 0 @@ -201,7 +204,7 @@ assert found == 1 def test_interp_many_paths(self): - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'node']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'node']) NODE = self._get_NODE() bytecode = "xxxxxxxb" def f(node): @@ -240,7 +243,7 @@ oldlimit = sys.getrecursionlimit() try: sys.setrecursionlimit(10000) - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'node']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'node']) NODE = self._get_NODE() bytecode = "xxxxxxxb" @@ -281,7 +284,7 @@ sys.setrecursionlimit(oldlimit) def test_nested_loops(self): - myjitdriver = JitDriver(greens = ['i'], reds = ['x', 'y']) + myjitdriver = JitDriver(greens=['i'], reds=['x', 'y']) bytecode = "abc= 0 + assert_(x >= 0) i = 0 while i < len(bytecode): myjitdriver.jit_merge_point(i=i, x=x) @@ -590,7 +593,7 @@ assert res == expected def test_unused_loop_constant(self): - myjitdriver = JitDriver(greens = [], reds = ['x', 'y', 'z']) + myjitdriver = JitDriver(greens=[], reds=['x', 'y', 'z']) def f(x, y, z): while z > 0: myjitdriver.can_enter_jit(x=x, y=y, z=z) @@ -603,7 +606,7 @@ assert res == expected def test_loop_unicode(self): - myjitdriver = JitDriver(greens = [], reds = ['n', 'x']) + myjitdriver = JitDriver(greens=[], reds=['n', 'x']) def f(n): x = u'' while n > 13: @@ -617,7 +620,7 @@ assert res == expected def test_loop_string(self): - myjitdriver = JitDriver(greens = [], reds = ['n', 'x']) + myjitdriver = JitDriver(greens=[], reds=['n', 'x']) def f(n): x = '' while n > 13: @@ -632,7 +635,7 @@ assert res == expected def test_adapt_bridge_to_merge_point(self): - myjitdriver = JitDriver(greens = [], reds = ['x', 'z']) + myjitdriver = JitDriver(greens=[], reds=['x', 'z']) class Z(object): def __init__(self, elem): @@ -812,7 +815,7 @@ self.check_trace_count(2) def test_path_with_operations_not_from_start(self): - jitdriver = JitDriver(greens = ['k'], reds = ['n', 'z']) + jitdriver = JitDriver(greens=['k'], reds=['n', 'z']) def f(n): k = 0 @@ -831,11 +834,11 @@ n -= 1 return 42 - res = self.meta_interp(f, [200]) + self.meta_interp(f, [200]) def test_path_with_operations_not_from_start_2(self): - jitdriver = JitDriver(greens = ['k'], reds = ['n', 'z', 'stuff']) + jitdriver = JitDriver(greens=['k'], reds=['n', 'z', 'stuff']) class Stuff(object): def __init__(self, n): @@ -869,7 +872,8 @@ BASE = lltype.GcStruct('BASE') A = lltype.GcStruct('A', ('parent', BASE), ('val', lltype.Signed)) B = lltype.GcStruct('B', ('parent', BASE), ('charval', lltype.Char)) - myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p']) + myjitdriver = JitDriver(greens=[], reds=['n', 'm', 'i', 'j', 'sa', 'p']) + def f(n, m, j): i = sa = 0 pa = lltype.malloc(A) @@ -888,22 +892,22 @@ pb = lltype.cast_pointer(lltype.Ptr(B), p) sa += ord(pb.charval) sa += 100 - assert n>0 and m>0 + assert_(n > 0 and m > 0) i += j return sa # This is detected as invalid by the codewriter, for now py.test.raises(NotImplementedError, self.meta_interp, f, [20, 10, 1]) def test_unerased_pointers_in_short_preamble(self): - from rpython.rlib.rerased import new_erasing_pair - from rpython.rtyper.lltypesystem import lltype class A(object): def __init__(self, val): self.val = val erase_A, unerase_A = new_erasing_pair('A') erase_TP, unerase_TP = new_erasing_pair('TP') TP = lltype.GcArray(lltype.Signed) - myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'j', 'sa', 'p']) + myjitdriver = JitDriver( + greens=[], reds=['n', 'm', 'i', 'j', 'sa', 'p']) + def f(n, m, j): i = sa = 0 p = erase_A(A(7)) @@ -918,14 +922,13 @@ else: sa += unerase_TP(p)[0] sa += A(i).val - assert n>0 and m>0 + assert_(n > 0 and m > 0) i += j return sa res = self.meta_interp(f, [20, 10, 1]) assert res == f(20, 10, 1) def test_boxed_unerased_pointers_in_short_preamble(self): - from rpython.rlib.rerased import new_erasing_pair from rpython.rtyper.lltypesystem import lltype class A(object): def __init__(self, val): @@ -940,7 +943,7 @@ erase_A, unerase_A = new_erasing_pair('A') erase_TP, unerase_TP = new_erasing_pair('TP') TP = lltype.GcArray(lltype.Signed) - myjitdriver = JitDriver(greens = [], reds = ['n', 'm', 'i', 'sa', 'p']) + myjitdriver = JitDriver(greens=[], reds=['n', 'm', 'i', 'sa', 'p']) def f(n, m): i = sa = 0 p = Box(erase_A(A(7))) @@ -1011,7 +1014,6 @@ class C(object): pass - from rpython.rlib.rerased import new_erasing_pair b_erase, b_unerase = new_erasing_pair("B") c_erase, c_unerase = new_erasing_pair("C") @@ -1044,7 +1046,6 @@ def test_unroll_issue_3(self): py.test.skip("decide") - from rpython.rlib.rerased import new_erasing_pair b_erase, b_unerase = new_erasing_pair("B") # list of ints c_erase, c_unerase = new_erasing_pair("C") # list of Nones @@ -1075,7 +1076,7 @@ assert res == 420 def test_not_too_many_bridges(self): - jitdriver = JitDriver(greens = [], reds = 'auto') + jitdriver = JitDriver(greens=[], reds='auto') def f(i): s = 0 @@ -1097,7 +1098,7 @@ def test_sharing_guards(self): py.test.skip("unimplemented") - driver = JitDriver(greens = [], reds = 'auto') + driver = JitDriver(greens=[], reds='auto') def f(i): s = 0 @@ -1145,7 +1146,7 @@ v = reverse(W_Cons(pc + 1, W_Cons(pc + 2, W_Cons(pc + 3, W_Cons(pc + 4, W_Nil()))))) pc = pc + 1 repetitions += 1 - + self.meta_interp(entry_point, []) From pypy.commits at gmail.com Mon Nov 6 05:40:35 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 06 Nov 2017 02:40:35 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: update the src/shared files to vmprof==0.4.10 Message-ID: <5a003c23.035d1c0a.21855.51b6@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92954:e4158aeecc04 Date: 2017-11-06 11:39 +0100 http://bitbucket.org/pypy/pypy/changeset/e4158aeecc04/ Log: update the src/shared files to vmprof==0.4.10 diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -32,12 +32,21 @@ static size_t threads_size = 0; static size_t thread_count = 0; static size_t threads_size_step = 8; -#endif int vmprof_get_itimer_type(void) { return itimer_type; } +int vmprof_get_signal_type(void) { + return signal_type; +} +#endif + +#ifdef VMPROF_WINDOWS +#include "vmprof_win.h" +#endif + + int vmprof_is_enabled(void) { return is_enabled; } @@ -62,10 +71,6 @@ profile_interval_usec = value; } -int vmprof_get_signal_type(void) { - return signal_type; -} - char *vmprof_init(int fd, double interval, int memory, int proflines, const char *interp_name, int native, int real_time) { diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -15,7 +15,9 @@ #include #endif +#ifdef VMPROF_UNIX #include "vmprof_getpc.h" +#endif #ifdef VMPROF_LINUX #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c @@ -8,7 +8,7 @@ #include static mach_port_t mach_task; -#else +#elif defined(VMPROF_UNIX) #include #include #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c @@ -41,8 +41,6 @@ void vmprof_ignore_signals(int ignored) { if (ignored) { - /* set the last bit, and wait until concurrently-running signal - handlers finish */ __sync_add_and_fetch(&signal_handler_ignore, 1L); while (signal_handler_entries != 0L) { usleep(1); @@ -370,7 +368,7 @@ goto error; if (install_sigprof_timer() == -1) goto error; - vmprof_ignore_signals(0); + signal_handler_ignore = 0; return 0; error: @@ -394,7 +392,7 @@ int vmprof_disable(void) { - vmprof_ignore_signals(1); + signal_handler_ignore = 1; vmprof_set_profile_interval_usec(0); #ifdef VMP_SUPPORTS_NATIVE_PROFILING disable_cpyprof(); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c @@ -1,7 +1,7 @@ -// cannot include this header because it also has definitions -#include "windows.h" -#include "compat.h" -#include "vmp_stack.h" +#include "vmprof_win.h" + +volatile int thread_started = 0; +volatile int enabled = 0; HANDLE write_mutex; @@ -12,7 +12,20 @@ return 0; } -#include +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, + int auto_retry) +{ + char buf[2048]; + long namelen; + + namelen = (long)strnlen(code_name, 1023); + buf[0] = MARKER_VIRTUAL_IP; + *(intptr_t*)(buf + 1) = code_uid; + *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; + memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); + vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); + return 0; +} int vmp_write_all(const char *buf, size_t bufsize) { @@ -40,3 +53,168 @@ return 0; } +HANDLE write_mutex; + +#include "vmprof_common.h" + +int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) +{ + HRESULT result; + HANDLE hThread; + int depth; + CONTEXT ctx; +#ifdef RPYTHON_LL2CTYPES + return 0; // not much we can do +#else +#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) + return 0; // we can't freeze threads, unsafe +#else + hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); + if (!hThread) { + return -1; + } + result = SuspendThread(hThread); + if(result == 0xffffffff) + return -1; // possible, e.g. attached debugger or thread alread suspended + // find the correct thread +#ifdef RPYTHON_VMPROF + ctx.ContextFlags = CONTEXT_FULL; + if (!GetThreadContext(hThread, &ctx)) + return -1; + depth = get_stack_trace(tstate->vmprof_tl_stack, + stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); + stack->depth = depth; + stack->stack[depth++] = thread_id; + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#else + depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, + MAX_STACK_DEPTH, 0, 0); + stack->depth = depth; + stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#endif + +#endif +#endif +} + +#ifndef RPYTHON_VMPROF +static +PY_WIN_THREAD_STATE * get_current_thread_state(void) +{ +#if PY_MAJOR_VERSION < 3 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#else + return _PyThreadState_UncheckedGet(); +#endif +} +#endif + +long __stdcall vmprof_mainloop(void *arg) +{ +#ifdef RPYTHON_LL2CTYPES + // for tests only + return 0; +#else + // it is not a test case! + PY_WIN_THREAD_STATE *tstate; + HANDLE hThreadSnap = INVALID_HANDLE_VALUE; + prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); + int depth; +#ifndef RPYTHON_VMPROF + // cpython version + while (1) { + Sleep(vmprof_get_profile_interval_usec() * 1000); + if (!enabled) { + continue; + } + tstate = get_current_thread_state(); + if (!tstate) + continue; + depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); + } + } +#else + // pypy version + while (1) { + //Sleep(vmprof_get_profile_interval_usec() * 1000); + Sleep(10); + if (!enabled) { + continue; + } + _RPython_ThreadLocals_Acquire(); + tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head + tstate = _RPython_ThreadLocals_Enum(tstate); + while (tstate) { + if (tstate->ready == 42) { + depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + depth * sizeof(void *) + + sizeof(struct prof_stacktrace_s) - + offsetof(struct prof_stacktrace_s, marker)); + } + } + tstate = _RPython_ThreadLocals_Enum(tstate); + } + _RPython_ThreadLocals_Release(); + } +#endif +#endif +} + +RPY_EXTERN +int vmprof_enable(int memory, int native, int real_time) +{ + if (!thread_started) { + if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { + return -1; + } + thread_started = 1; + } + enabled = 1; + return 0; +} + +RPY_EXTERN +int vmprof_disable(void) +{ + char marker = MARKER_TRAILER; + (void)vmp_write_time_now(MARKER_TRAILER); + + enabled = 0; + vmp_set_profile_fileno(-1); + return 0; +} + +RPY_EXTERN +void vmprof_ignore_signals(int ignored) +{ + enabled = !ignored; +} + +int vmp_native_enable(void) +{ + return 0; +} + +void vmp_native_disable(void) +{ +} + +int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, + int max_depth, intptr_t pc) +{ + return 0; +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h @@ -3,20 +3,13 @@ #include "windows.h" #include "compat.h" #include "vmp_stack.h" - -HANDLE write_mutex; +#include int prepare_concurrent_bufs(void); -#include "vmprof_common.h" -#include - // This file has been inspired (but not copied from since the LICENSE // would not allow it) from verysleepy profiler -volatile int thread_started = 0; -volatile int enabled = 0; - int vmp_write_all(const char *buf, size_t bufsize); #ifdef RPYTHON_VMPROF @@ -26,178 +19,14 @@ #endif -RPY_EXTERN int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, - int auto_retry) -{ - char buf[2048]; - long namelen; + int auto_retry); - namelen = (long)strnlen(code_name, 1023); - buf[0] = MARKER_VIRTUAL_IP; - *(intptr_t*)(buf + 1) = code_uid; - *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; - memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); - vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); - return 0; -} - -int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) -{ - HRESULT result; - HANDLE hThread; - int depth; - CONTEXT ctx; -#ifdef RPYTHON_LL2CTYPES - return 0; // not much we can do -#else -#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) - return 0; // we can't freeze threads, unsafe -#else - hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); - if (!hThread) { - return -1; - } - result = SuspendThread(hThread); - if(result == 0xffffffff) - return -1; // possible, e.g. attached debugger or thread alread suspended - // find the correct thread -#ifdef RPYTHON_VMPROF - ctx.ContextFlags = CONTEXT_FULL; - if (!GetThreadContext(hThread, &ctx)) - return -1; - depth = get_stack_trace(tstate->vmprof_tl_stack, - stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); - stack->depth = depth; - stack->stack[depth++] = thread_id; - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#else - depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, - MAX_STACK_DEPTH, 0, 0); - stack->depth = depth; - stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#endif - -#endif -#endif -} - -#ifndef RPYTHON_VMPROF -static -PY_WIN_THREAD_STATE * get_current_thread_state(void) -{ -#if PY_MAJOR_VERSION < 3 - return _PyThreadState_Current; -#elif PY_VERSION_HEX < 0x03050200 - return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); -#else - return _PyThreadState_UncheckedGet(); -#endif -} -#endif - -long __stdcall vmprof_mainloop(void *arg) -{ -#ifdef RPYTHON_LL2CTYPES - // for tests only - return 0; -#else - // it is not a test case! - PY_WIN_THREAD_STATE *tstate; - HANDLE hThreadSnap = INVALID_HANDLE_VALUE; - prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); - int depth; -#ifndef RPYTHON_VMPROF - // cpython version - while (1) { - Sleep(profile_interval_usec * 1000); - if (!enabled) { - continue; - } - tstate = get_current_thread_state(); - if (!tstate) - continue; - depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); - } - } -#else - // pypy version - while (1) { - //Sleep(profile_interval_usec * 1000); - Sleep(10); - if (!enabled) { - continue; - } - _RPython_ThreadLocals_Acquire(); - tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head - tstate = _RPython_ThreadLocals_Enum(tstate); - while (tstate) { - if (tstate->ready == 42) { - depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - depth * sizeof(void *) + - sizeof(struct prof_stacktrace_s) - - offsetof(struct prof_stacktrace_s, marker)); - } - } - tstate = _RPython_ThreadLocals_Enum(tstate); - } - _RPython_ThreadLocals_Release(); - } -#endif -#endif -} - -RPY_EXTERN -int vmprof_enable(int memory, int native, int real_time) -{ - if (!thread_started) { - if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { - return -1; - } - thread_started = 1; - } - enabled = 1; - return 0; -} - -RPY_EXTERN -int vmprof_disable(void) -{ - char marker = MARKER_TRAILER; - (void)vmp_write_time_now(MARKER_TRAILER); - - enabled = 0; - vmp_set_profile_fileno(-1); - return 0; -} - -RPY_EXTERN -void vmprof_ignore_signals(int ignored) -{ - enabled = !ignored; -} - -int vmp_native_enable(void) { - return 0; -} - -void vmp_native_disable(void) { -} - +PY_WIN_THREAD_STATE * get_current_thread_state(void); +int vmprof_enable(int memory, int native, int real_time); +int vmprof_disable(void); +void vmprof_ignore_signals(int ignored); +int vmp_native_enable(void); +void vmp_native_disable(void); int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, - int max_depth, intptr_t pc) -{ - return 0; -} + int max_depth, intptr_t pc); From pypy.commits at gmail.com Mon Nov 6 05:40:31 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 06 Nov 2017 02:40:31 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: check also the subdirectories Message-ID: <5a003c1f.52bf1c0a.ebca9.b65c@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92952:2b6ce63316a3 Date: 2017-11-06 11:35 +0100 http://bitbucket.org/pypy/pypy/changeset/2b6ce63316a3/ Log: check also the subdirectories diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -5,19 +5,25 @@ RVMPROF = py.path.local(__file__).join('..', '..') def github_raw_file(repo, path, branch='master'): - return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict( - repo=repo, path=path, branch=branch - )) + url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}" + return url.format(repo=repo, path=path, branch=branch) +def get_list_of_files(shared): + files = list(shared.visit('*.[ch]')) + files.remove(shared.join('libbacktrace', 'config-x86_32.h')) + files.remove(shared.join('libbacktrace', 'config-x86_64.h')) + files.remove(shared.join('libbacktrace', 'gstdint.h')) + return files def test_same_file(): shared = RVMPROF.join('src', 'shared') - files = shared.listdir('*.[ch]') + files = get_list_of_files(shared) assert files, 'cannot find any C file, probably the directory is wrong?' no_matches = [] print for file in files: - url = github_raw_file("vmprof/vmprof-python", "src/%s" % file.basename) + path = file.relto(shared) + url = github_raw_file("vmprof/vmprof-python", "src/%s" % path) source = urllib2.urlopen(url).read() dest = file.read() shortname = file.relto(RVMPROF) From pypy.commits at gmail.com Mon Nov 6 05:40:33 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 06 Nov 2017 02:40:33 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: add a comment Message-ID: <5a003c21.831d1c0a.97c7a.9051@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92953:7fb3b80d41b2 Date: 2017-11-06 11:36 +0100 http://bitbucket.org/pypy/pypy/changeset/7fb3b80d41b2/ Log: add a comment diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -10,6 +10,8 @@ def get_list_of_files(shared): files = list(shared.visit('*.[ch]')) + # in PyPy we checkin the result of ./configure; as such, these files are + # not in github and can be skipped files.remove(shared.join('libbacktrace', 'config-x86_32.h')) files.remove(shared.join('libbacktrace', 'config-x86_64.h')) files.remove(shared.join('libbacktrace', 'gstdint.h')) From pypy.commits at gmail.com Mon Nov 6 11:15:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 06 Nov 2017 08:15:42 -0800 (PST) Subject: [pypy-commit] pypy default: Add testrunner/get_info.py script for the buildbot Message-ID: <5a008aae.15981c0a.a4939.77ae@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92955:e68c2a6d0069 Date: 2017-11-06 16:15 +0000 http://bitbucket.org/pypy/pypy/changeset/e68c2a6d0069/ Log: Add testrunner/get_info.py script for the buildbot diff --git a/testrunner/get_info.py b/testrunner/get_info.py new file mode 100644 --- /dev/null +++ b/testrunner/get_info.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +""" +Dump some translation information to stdout as JSON. Used by buildbot. +""" + +import sys +import os +import json + +BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +TARGET_BASENAME = 'pypy-c' + +def make_info_dict(): + target = TARGET_BASENAME + if sys.platform.startswith('win'): + target += '.exe' + target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target) + return {'target_path': target_path} + +def dump_info(): + return json.dumps(make_info_dict()) + +if __name__ == '__main__': + print dump_info() From pypy.commits at gmail.com Mon Nov 6 11:54:28 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 06 Nov 2017 08:54:28 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a0093c4.d5301c0a.a92b1.fe09@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92956:ed4ba7032f9d Date: 2017-11-06 16:53 +0000 http://bitbucket.org/pypy/pypy/changeset/ed4ba7032f9d/ Log: hg merge default diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -185,6 +185,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/rpython/jit/metainterp/test/test_del.py b/rpython/jit/metainterp/test/test_del.py --- a/rpython/jit/metainterp/test/test_del.py +++ b/rpython/jit/metainterp/test/test_del.py @@ -82,46 +82,5 @@ assert res == 1 self.check_resops(call_r=1) # for the case B(), but not for the case A() - def test_keepalive(self): - py.test.skip("XXX fails") # hum, I think the test itself is broken - # - mydriver = JitDriver(reds = ['n', 'states'], greens = []) - class State: - num = 1 - class X: - def __init__(self, state): - self.state = state - def __del__(self): - self.state.num += 1 - @dont_look_inside - def do_stuff(): - pass - def f(n): - states = [] - while n > 0: - mydriver.jit_merge_point(n=n, states=states) - state = State() - states.append(state) - x = X(state) - do_stuff() - state.num *= 1000 - do_stuff() - keepalive_until_here(x) - n -= 1 - return states - def main(n): - states = f(n) - rgc.collect() - rgc.collect() - err = 1001 - for state in states: - if state.num != 1001: - err = state.num - print 'ERROR:', err - return err - assert main(20) == 1001 - res = self.meta_interp(main, [20]) - assert res == 1001 - class TestLLtype(DelTests, LLJitMixin): pass diff --git a/testrunner/get_info.py b/testrunner/get_info.py new file mode 100644 --- /dev/null +++ b/testrunner/get_info.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +""" +Dump some translation information to stdout as JSON. Used by buildbot. +""" + +import sys +import os +import json + +BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +TARGET_BASENAME = 'pypy-c' + +def make_info_dict(): + target = TARGET_BASENAME + if sys.platform.startswith('win'): + target += '.exe' + target_path = os.path.join(BASE_DIR, 'pypy', 'goal', target) + return {'target_path': target_path} + +def dump_info(): + return json.dumps(make_info_dict()) + +if __name__ == '__main__': + print dump_info() From pypy.commits at gmail.com Mon Nov 6 12:04:51 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 06 Nov 2017 09:04:51 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: one more refactor Message-ID: <5a009633.94ae1c0a.4c38f.806f@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92958:5f1804f818b4 Date: 2017-11-06 18:04 +0100 http://bitbucket.org/pypy/pypy/changeset/5f1804f818b4/ Log: one more refactor diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -60,30 +60,23 @@ assert self.rpy_entry_point() == 0 -def test_register_code(): - - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): +class TestRegisterCode(RVMProfTest): + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): print num return 42 - def f(): - code = MyCode() + def entry_point(self): + code = self.MyCode() rvmprof.register_code(code, lambda code: 'some code') - res = main(code, 5) + res = self.main(code, 5) assert res == 42 return 0 - assert f() == 0 - fn = compile(f, []) #, gcpolicy="minimark") - assert fn() == 0 + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 def test_enable(): From pypy.commits at gmail.com Mon Nov 6 12:04:49 2017 From: pypy.commits at gmail.com (antocuni) Date: Mon, 06 Nov 2017 09:04:49 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: WIP: rvmprof tests are a 90% one the copy of another, but they are a tangled mess. Start to refactor into a more manageable structure Message-ID: <5a009631.95091c0a.dea7.22df@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92957:6847b345ac78 Date: 2017-11-06 17:30 +0100 http://bitbucket.org/pypy/pypy/changeset/6847b345ac78/ Log: WIP: rvmprof tests are a 90% one the copy of another, but they are a tangled mess. Start to refactor into a more manageable structure diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -7,57 +7,57 @@ from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.lltypesystem import rffi, lltype +class RVMProfTest: -def test_vmprof_execute_code_1(): + class MyCode: pass - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass + def setup_method(self, meth): + self.register() + self.rpy_entry_point = compile(self.entry_point, []) - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): + def register(self): + try: + rvmprof.register_code_object_class(self.MyCode, + lambda code: 'some code') + except rvmprof.VMProfPlatformUnsupported as e: + py.test.skip(str(e)) + + +class TestExecuteCode(RVMProfTest): + + def entry_point(self): + res = self.main(self.MyCode(), 5) + assert res == 42 + return 0 + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): print num return 42 - def f(): - res = main(MyCode(), 5) - assert res == 42 + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 + + +class TestResultClass(RVMProfTest): + + class A: pass + + @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code, + result_class=A) + def main(self, num, code): + print num + return self.A() + + def entry_point(self): + a = self.main(7, self.MyCode()) + assert isinstance(a, self.A) return 0 - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 - - -def test_vmprof_execute_code_2(): - - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass - - class A: - pass - - @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code, - result_class=A) - def main(num, code): - print num - return A() - - def f(): - a = main(7, MyCode()) - assert isinstance(a, A) - return 0 - - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 def test_register_code(): @@ -82,7 +82,7 @@ return 0 assert f() == 0 - fn = compile(f, [], gcpolicy="minimark") + fn = compile(f, []) #, gcpolicy="minimark") assert fn() == 0 @@ -193,6 +193,7 @@ fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666) num = 10000 period = 0.0001 + rvmprof.enable(fd, period, native=1) for i in range(num): res = main(code, 3) diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py --- a/rpython/translator/translator.py +++ b/rpython/translator/translator.py @@ -141,6 +141,9 @@ if isinstance(func, FunctionGraph): return func result = [] + if hasattr(func, 'im_func'): + # make it possible to translate bound methods + func = func.im_func for graph in translator.graphs: if getattr(graph, 'func', None) is func: result.append(graph) From pypy.commits at gmail.com Mon Nov 6 13:22:41 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 06 Nov 2017 10:22:41 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix TARGET_BASENAME for pypy3 Message-ID: <5a00a871.26acdf0a.2f304.11e9@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92959:585896fe6599 Date: 2017-11-06 18:22 +0000 http://bitbucket.org/pypy/pypy/changeset/585896fe6599/ Log: Fix TARGET_BASENAME for pypy3 diff --git a/testrunner/get_info.py b/testrunner/get_info.py --- a/testrunner/get_info.py +++ b/testrunner/get_info.py @@ -8,7 +8,7 @@ import json BASE_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) -TARGET_BASENAME = 'pypy-c' +TARGET_BASENAME = 'pypy3-c' def make_info_dict(): target = TARGET_BASENAME From pypy.commits at gmail.com Mon Nov 6 14:29:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 06 Nov 2017 11:29:19 -0800 (PST) Subject: [pypy-commit] buildbot default: Use testrunner/get_info.py to get the name of the pypy executable Message-ID: <5a00b80f.52c6df0a.a9cc2.4a79@mx.google.com> Author: Ronan Lamy Branch: Changeset: r1038:33fc33e373e0 Date: 2017-11-06 19:29 +0000 http://bitbucket.org/pypy/buildbot/changeset/33fc33e373e0/ Log: Use testrunner/get_info.py to get the name of the pypy executable diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -4,11 +4,12 @@ from buildbot.process import factory from buildbot.steps import shell, transfer from buildbot.steps.trigger import Trigger -from buildbot.process.properties import WithProperties, Interpolate +from buildbot.process.properties import WithProperties, Interpolate, Property from buildbot import locks from pypybuildbot.util import symlink_force from buildbot.status.results import SKIPPED, SUCCESS import os +import json # buildbot supports SlaveLocks, which can be used to limit the amout of builds # to be run on each slave in parallel. However, they assume that each @@ -375,6 +376,7 @@ alwaysUseLatest=alwaysUseLatest, logEnviron=False)) + def setup_steps(platform, factory, workdir=None, repourl='https://bitbucket.org/pypy/pypy/', force_branch=None): @@ -392,6 +394,14 @@ # factory.addStep(CheckGotRevision(workdir=workdir)) + def extract_info(rc, stdout, stderr): + if rc == 0: + return json.loads(stdout) + else: + return {} + factory.addStep(shell.SetPropertyFromCommand( + command=['python', 'testrunner/get_info.py'], + extract_fn=extract_info)) def build_name(platform, jit=False, flags=[], placeholder=None): if placeholder is None: @@ -457,11 +467,10 @@ timeout=4000, env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest), })) - test_interpreter = '../build/pypy/goal/pypy-c' factory.addStep(ShellCmd( description="Create virtualenv", - command=prefix + ['virtualenv', '--clear', '-p', test_interpreter, - 'pypy-venv'], + command=prefix + ['virtualenv', '--clear', '-p', + Property('target_path'), 'pypy-venv'], workdir='venv', flunkOnFailure=True)) if platform == 'win32': From pypy.commits at gmail.com Tue Nov 7 13:39:22 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 07 Nov 2017 10:39:22 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Bail early in .startswith() and .endswith() is start is past the end of the string. Message-ID: <5a01fdda.46901c0a.9954e.7bc5@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92960:1233d5aa782f Date: 2017-11-07 18:38 +0000 http://bitbucket.org/pypy/pypy/changeset/1233d5aa782f/ Log: Bail early in .startswith() and .endswith() is start is past the end of the string. This prevents an overflow, followed by a segfault, in rpython.rlib.rstring.startswith() when start is close to sys.maxint. diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py --- a/pypy/objspace/std/stringmethods.py +++ b/pypy/objspace/std/stringmethods.py @@ -628,6 +628,8 @@ def _startswith(self, space, value, w_prefix, start, end): prefix = self._op_val(space, w_prefix) + if start > len(value): + return False return startswith(value, prefix, start, end) def descr_endswith(self, space, w_suffix, w_start=None, w_end=None): @@ -653,6 +655,8 @@ def _endswith(self, space, value, w_prefix, start, end): prefix = self._op_val(space, w_prefix) + if start > len(value): + return False return endswith(value, prefix, start, end) def _strip(self, space, w_chars, left, right, name='strip'): From pypy.commits at gmail.com Tue Nov 7 14:12:11 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 07 Nov 2017 11:12:11 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Remove explicit refcount checks from _testcapi Message-ID: <5a02058b.03251c0a.283b7.48cf@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92961:aa87739bdc0a Date: 2017-11-07 19:11 +0000 http://bitbucket.org/pypy/pypy/changeset/aa87739bdc0a/ Log: Remove explicit refcount checks from _testcapi diff --git a/lib_pypy/_testcapimodule.c b/lib_pypy/_testcapimodule.c --- a/lib_pypy/_testcapimodule.c +++ b/lib_pypy/_testcapimodule.c @@ -915,12 +915,6 @@ return -1; } Py_DECREF(res); - if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " - "arg was not decrefed in successful " - "Py_BuildValue(\"%s\")", fmt); - return -1; - } Py_INCREF(arg); res = Py_BuildValue(fmt, raise_error, NULL, arg); @@ -930,12 +924,6 @@ return -1; } PyErr_Clear(); - if (Py_REFCNT(arg) != 1) { - PyErr_Format(TestError, "test_buildvalue_N: " - "arg was not decrefed in failed " - "Py_BuildValue(\"%s\")", fmt); - return -1; - } Py_DECREF(arg); return 0; } @@ -958,10 +946,6 @@ return raiseTestError("test_buildvalue_N", "Py_BuildValue(\"N\") returned wrong result"); } - if (Py_REFCNT(arg) != 2) { - return raiseTestError("test_buildvalue_N", - "arg was not decrefed in Py_BuildValue(\"N\")"); - } Py_DECREF(res); Py_DECREF(arg); From pypy.commits at gmail.com Tue Nov 7 19:40:17 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:17 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: apparently, gc='minimark' is not needed for this test. Not sure why it was written like that Message-ID: <5a025271.42da1c0a.356df.ce91@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92962:5cc71a3d3d71 Date: 2017-11-07 12:07 +0100 http://bitbucket.org/pypy/pypy/changeset/5cc71a3d3d71/ Log: apparently, gc='minimark' is not needed for this test. Not sure why it was written like that diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -131,7 +131,7 @@ assert f() == 0 assert os.path.exists(tmpfilename) - fn = compile(f, [], gcpolicy="minimark") + fn = compile(f, []) assert fn() == 0 try: check_profile(tmpfilename) From pypy.commits at gmail.com Tue Nov 7 19:40:19 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:19 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: refactor test_enable to use the new style of testing Message-ID: <5a025273.424a1c0a.62b2c.5865@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92963:1067112a9755 Date: 2017-11-07 15:48 +0100 http://bitbucket.org/pypy/pypy/changeset/1067112a9755/ Log: refactor test_enable to use the new style of testing diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -1,4 +1,5 @@ import py, os +import pytest from rpython.tool.udir import udir from rpython.rlib import rvmprof from rpython.translator.c.test.test_genc import compile @@ -7,18 +8,22 @@ from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.lltypesystem import rffi, lltype -class RVMProfTest: + at pytest.mark.usefixtures('init') +class RVMProfTest(object): class MyCode: pass - def setup_method(self, meth): + @pytest.fixture + def init(self): self.register() self.rpy_entry_point = compile(self.entry_point, []) def register(self): + def get_name(code): + return 'py:code:52:x' + try: - rvmprof.register_code_object_class(self.MyCode, - lambda code: 'some code') + rvmprof.register_code_object_class(self.MyCode, get_name) except rvmprof.VMProfPlatformUnsupported as e: py.test.skip(str(e)) @@ -79,19 +84,17 @@ assert self.rpy_entry_point() == 0 -def test_enable(): +class TestEnable(RVMProfTest): - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) + @pytest.fixture + def init(self, tmpdir): + self.tmpdir = tmpdir + self.tmpfile = tmpdir.join('profile.vmprof') + self.tmpfilename = str(self.tmpfile) + super(TestEnable, self).init() - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): print num s = 0 for i in range(num): @@ -100,15 +103,16 @@ print s return s - tmpfilename = str(udir.join('test_rvmprof')) + def entry_point(self): + def get_name(code): + return 'py:code:52:x' - def f(): if NonConstant(False): # Hack to give os.open() the correct annotation os.open('foo', 1, 1) - code = MyCode() + code = self.MyCode() rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) + fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) if we_are_translated(): num = 100000000 period = 0.0001 @@ -116,28 +120,24 @@ num = 10000 period = 0.9 rvmprof.enable(fd, period) - res = main(code, num) + res = self.main(code, num) #assert res == 499999500000 rvmprof.disable() os.close(fd) return 0 - def check_profile(filename): + def test(self): from vmprof import read_profile - - prof = read_profile(filename) + assert self.entry_point() == 0 + assert self.tmpfile.check() + self.tmpfile.remove() + # + assert self.rpy_entry_point() == 0 + assert self.tmpfile.check() + prof = read_profile(self.tmpfilename) assert prof.get_tree().name.startswith("py:") assert prof.get_tree().count - assert f() == 0 - assert os.path.exists(tmpfilename) - fn = compile(f, []) - assert fn() == 0 - try: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) def test_native(): eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], From pypy.commits at gmail.com Tue Nov 7 19:40:26 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:26 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: apparently, we don't need this Message-ID: <5a02527a.21b9df0a.93d86.cbd4@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92966:0944d36d3dda Date: 2017-11-07 16:53 +0100 http://bitbucket.org/pypy/pypy/changeset/0944d36d3dda/ Log: apparently, we don't need this diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -3,8 +3,6 @@ from rpython.tool.udir import udir from rpython.rlib import rvmprof from rpython.translator.c.test.test_genc import compile -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.nonconst import NonConstant from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.lltypesystem import rffi, lltype @@ -100,9 +98,6 @@ ENTRY_POINT_ARGS = (int, float) def entry_point(self, count, period): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) code = self.MyCode('py:code:52:test_enable') rvmprof.register_code(code, self.MyCode.get_name) fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) From pypy.commits at gmail.com Tue Nov 7 19:40:22 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:22 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: make it possible to specify a name when you create MyCode() Message-ID: <5a025276.05c4df0a.f10f0.76d1@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92964:2cc191d05d43 Date: 2017-11-07 15:54 +0100 http://bitbucket.org/pypy/pypy/changeset/2cc191d05d43/ Log: make it possible to specify a name when you create MyCode() diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -11,7 +11,12 @@ @pytest.mark.usefixtures('init') class RVMProfTest(object): - class MyCode: pass + class MyCode(object): + def __init__(self, name='py:code:0:noname'): + self.name = name + + def get_name(self): + return self.name @pytest.fixture def init(self): @@ -19,11 +24,9 @@ self.rpy_entry_point = compile(self.entry_point, []) def register(self): - def get_name(code): - return 'py:code:52:x' - try: - rvmprof.register_code_object_class(self.MyCode, get_name) + rvmprof.register_code_object_class(self.MyCode, + self.MyCode.get_name) except rvmprof.VMProfPlatformUnsupported as e: py.test.skip(str(e)) @@ -104,14 +107,11 @@ return s def entry_point(self): - def get_name(code): - return 'py:code:52:x' - if NonConstant(False): # Hack to give os.open() the correct annotation os.open('foo', 1, 1) - code = self.MyCode() - rvmprof.register_code(code, get_name) + code = self.MyCode('py:code:52:test_enable') + rvmprof.register_code(code, self.MyCode.get_name) fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) if we_are_translated(): num = 100000000 @@ -135,8 +135,9 @@ assert self.rpy_entry_point() == 0 assert self.tmpfile.check() prof = read_profile(self.tmpfilename) - assert prof.get_tree().name.startswith("py:") - assert prof.get_tree().count + tree = prof.get_tree() + assert tree.name == 'py:code:52:test_enable' + assert tree.count def test_native(): From pypy.commits at gmail.com Tue Nov 7 19:40:28 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:28 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: improve test_enable by: 1) make sure that it runs for approximately 0.5 seconds; 2) check that the number of profiles is what we expect Message-ID: <5a02527c.53d71c0a.3c4c6.f084@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92967:6c26abf30648 Date: 2017-11-08 01:21 +0100 http://bitbucket.org/pypy/pypy/changeset/6c26abf30648/ Log: improve test_enable by: 1) make sure that it runs for approximately 0.5 seconds; 2) check that the number of profiles is what we expect diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -1,5 +1,6 @@ import py, os import pytest +import time from rpython.tool.udir import udir from rpython.rlib import rvmprof from rpython.translator.c.test.test_genc import compile @@ -89,6 +90,10 @@ class RVMProfSamplingTest(RVMProfTest): + # the kernel will deliver SIGPROF at max 250 Hz. See also + # https://github.com/vmprof/vmprof-python/issues/163 + SAMPLING_INTERVAL = 1/250.0 + @pytest.fixture def init(self, tmpdir): self.tmpdir = tmpdir @@ -97,41 +102,44 @@ super(RVMProfSamplingTest, self).init() ENTRY_POINT_ARGS = (int, float) - def entry_point(self, count, period): + def entry_point(self, value, delta_t): code = self.MyCode('py:code:52:test_enable') rvmprof.register_code(code, self.MyCode.get_name) fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) - rvmprof.enable(fd, period) - res = self.main(code, count) + rvmprof.enable(fd, self.SAMPLING_INTERVAL) + start = time.time() + res = 0 + while time.time() < start+delta_t: + res = self.main(code, value) rvmprof.disable() os.close(fd) return res + def approx_equal(self, a, b, tolerance=0.1): + max_diff = (a+b)/2.0 * tolerance + return abs(a-b) < max_diff class TestEnable(RVMProfSamplingTest): @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) def main(self, code, count): - print count s = 0 for i in range(count): s += (i << 1) - if s % 2123423423 == 0: - print s return s def test(self): from vmprof import read_profile - assert self.entry_point(10**4, 0.9) == 99990000 + assert self.entry_point(10**4, 0.1) == 99990000 assert self.tmpfile.check() self.tmpfile.remove() # - assert self.rpy_entry_point(10**8, 0.0001) == 9999999900000000 + assert self.rpy_entry_point(10**4, 0.5) == 99990000 assert self.tmpfile.check() prof = read_profile(self.tmpfilename) tree = prof.get_tree() assert tree.name == 'py:code:52:test_enable' - assert tree.count + assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL) def test_native(): From pypy.commits at gmail.com Tue Nov 7 19:40:24 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:24 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: factor out some reusable logic from TestEnabled, which will be usable also from the upcoming TestNative Message-ID: <5a025278.53d71c0a.3c4c6.f07a@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92965:70e2f742d15e Date: 2017-11-07 16:53 +0100 http://bitbucket.org/pypy/pypy/changeset/70e2f742d15e/ Log: factor out some reusable logic from TestEnabled, which will be usable also from the upcoming TestNative diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -11,6 +11,8 @@ @pytest.mark.usefixtures('init') class RVMProfTest(object): + ENTRY_POINT_ARGS = () + class MyCode(object): def __init__(self, name='py:code:0:noname'): self.name = name @@ -21,7 +23,7 @@ @pytest.fixture def init(self): self.register() - self.rpy_entry_point = compile(self.entry_point, []) + self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS) def register(self): try: @@ -87,52 +89,49 @@ assert self.rpy_entry_point() == 0 -class TestEnable(RVMProfTest): +class RVMProfSamplingTest(RVMProfTest): @pytest.fixture def init(self, tmpdir): self.tmpdir = tmpdir self.tmpfile = tmpdir.join('profile.vmprof') self.tmpfilename = str(self.tmpfile) - super(TestEnable, self).init() + super(RVMProfSamplingTest, self).init() - @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) - def main(self, code, num): - print num - s = 0 - for i in range(num): - s += (i << 1) - if s % 2123423423 == 0: - print s - return s - - def entry_point(self): + ENTRY_POINT_ARGS = (int, float) + def entry_point(self, count, period): if NonConstant(False): # Hack to give os.open() the correct annotation os.open('foo', 1, 1) code = self.MyCode('py:code:52:test_enable') rvmprof.register_code(code, self.MyCode.get_name) fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) - if we_are_translated(): - num = 100000000 - period = 0.0001 - else: - num = 10000 - period = 0.9 rvmprof.enable(fd, period) - res = self.main(code, num) - #assert res == 499999500000 + res = self.main(code, count) rvmprof.disable() os.close(fd) - return 0 + return res + + +class TestEnable(RVMProfSamplingTest): + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): + print count + s = 0 + for i in range(count): + s += (i << 1) + if s % 2123423423 == 0: + print s + return s def test(self): from vmprof import read_profile - assert self.entry_point() == 0 + assert self.entry_point(10**4, 0.9) == 99990000 assert self.tmpfile.check() self.tmpfile.remove() # - assert self.rpy_entry_point() == 0 + assert self.rpy_entry_point(10**8, 0.0001) == 9999999900000000 assert self.tmpfile.check() prof = read_profile(self.tmpfilename) tree = prof.get_tree() From pypy.commits at gmail.com Tue Nov 7 19:40:30 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:40:30 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: rewrite test_native by reusing RVMProfSamplingTest. It still fails, obviously Message-ID: <5a02527e.c6a2df0a.13ea3.b893@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92968:e0fdd6a424df Date: 2017-11-08 01:39 +0100 http://bitbucket.org/pypy/pypy/changeset/e0fdd6a424df/ Log: rewrite test_native by reusing RVMProfSamplingTest. It still fails, obviously diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -119,6 +119,7 @@ max_diff = (a+b)/2.0 * tolerance return abs(a-b) < max_diff + class TestEnable(RVMProfSamplingTest): @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) @@ -142,66 +143,44 @@ assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL) -def test_native(): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], - separate_module_sources=[""" - RPY_EXTERN int native_func(int d) { - int j = 0; - if (d > 0) { - return native_func(d-1); - } else { - for (int i = 0; i < 42000; i++) { - j += d; +class TestNative(RVMProfSamplingTest): + + @pytest.fixture + def init(self, tmpdir): + eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + separate_module_sources=[""" + RPY_EXTERN int native_func(int d) { + int j = 0; + if (d > 0) { + return native_func(d-1); + } else { + for (int i = 0; i < 42000; i++) { + j += 1; + } } + return j; } - return j; - } - """]) + """]) + self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, + compilation_info=eci) + super(TestNative, self).init(tmpdir) - native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, - compilation_info=eci) + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): + if count > 0: + return self.main(code, count-1) + else: + return self.native_func(100) - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - if num > 0: - return main(code, num-1) - else: - return native_func(100) - - tmpfilename = str(udir.join('test_rvmprof')) - - def f(): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) - code = MyCode() - rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666) - num = 10000 - period = 0.0001 - - rvmprof.enable(fd, period, native=1) - for i in range(num): - res = main(code, 3) - rvmprof.disable() - os.close(fd) - return 0 - - def check_profile(filename): + def test(self): + # XXX: this test is known to fail since rev a4f077ba651c, but buildbot + # never ran it. FIXME. from vmprof import read_profile from vmprof.show import PrettyPrinter - - prof = read_profile(filename) + assert self.rpy_entry_point(3, 0.5) == 42000 + assert self.tmpfile.check() + # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() p = PrettyPrinter() p._print_tree(tree) @@ -220,12 +199,3 @@ del not_found[i] break assert not_found == [] - - fn = compile(f, [], gcpolicy="incminimark", lldebug=True) - assert fn() == 0 - try: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) - From pypy.commits at gmail.com Tue Nov 7 19:42:40 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 07 Nov 2017 16:42:40 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: I claim that tests should never be skipped implicitly. If there is some platform on which vmprof doesn't work, buildbot will tell us and we can skip them explicitly. Else the risk is to skip tests which are meant to run, as it happened with test_enable and test_native since forever Message-ID: <5a025300.01141c0a.59d4e.19da@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92969:49caf38340af Date: 2017-11-08 01:42 +0100 http://bitbucket.org/pypy/pypy/changeset/49caf38340af/ Log: I claim that tests should never be skipped implicitly. If there is some platform on which vmprof doesn't work, buildbot will tell us and we can skip them explicitly. Else the risk is to skip tests which are meant to run, as it happened with test_enable and test_native since forever diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -25,11 +25,8 @@ self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS) def register(self): - try: - rvmprof.register_code_object_class(self.MyCode, - self.MyCode.get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) + rvmprof.register_code_object_class(self.MyCode, + self.MyCode.get_name) class TestExecuteCode(RVMProfTest): From pypy.commits at gmail.com Tue Nov 7 21:59:52 2017 From: pypy.commits at gmail.com (stian) Date: Tue, 07 Nov 2017 18:59:52 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Kill dead code, clean up normalization, and disable an assert that causes C code warnings. Its a helper function for _x_divrem and since d is SHIFT - bits_in_digit, which is always SHIFT or smaller already Message-ID: <5a027328.8cabdf0a.5316d.2ab6@mx.google.com> Author: stian Branch: math-improvements Changeset: r92970:7f48dd825978 Date: 2017-11-08 03:59 +0100 http://bitbucket.org/pypy/pypy/changeset/7f48dd825978/ Log: Kill dead code, clean up normalization, and disable an assert that causes C code warnings. Its a helper function for _x_divrem and since d is SHIFT - bits_in_digit, which is always SHIFT or smaller already diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -1459,9 +1459,8 @@ i -= 1 assert i > 0 - if i != self.numdigits(): - self.size = i - if self.numdigits() == 1 and self._digits[0] == NULLDIGIT: + self.size = i + if i == 1 and self._digits[0] == NULLDIGIT: self.sign = 0 self._digits = NULLDIGITS @@ -1940,103 +1939,6 @@ ret._normalize() return ret -""" (*) Why adding t3 can't "run out of room" above. - -Let f(x) mean the floor of x and c(x) mean the ceiling of x. Some facts -to start with: - -1. For any integer i, i = c(i/2) + f(i/2). In particular, - bsize = c(bsize/2) + f(bsize/2). -2. shift = f(bsize/2) -3. asize <= bsize -4. Since we call k_lopsided_mul if asize*2 <= bsize, asize*2 > bsize in this - routine, so asize > bsize/2 >= f(bsize/2) in this routine. - -We allocated asize + bsize result digits, and add t3 into them at an offset -of shift. This leaves asize+bsize-shift allocated digit positions for t3 -to fit into, = (by #1 and #2) asize + f(bsize/2) + c(bsize/2) - f(bsize/2) = -asize + c(bsize/2) available digit positions. - -bh has c(bsize/2) digits, and bl at most f(size/2) digits. So bh+hl has -at most c(bsize/2) digits + 1 bit. - -If asize == bsize, ah has c(bsize/2) digits, else ah has at most f(bsize/2) -digits, and al has at most f(bsize/2) digits in any case. So ah+al has at -most (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 1 bit. - -The product (ah+al)*(bh+bl) therefore has at most - - c(bsize/2) + (asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits - -and we have asize + c(bsize/2) available digit positions. We need to show -this is always enough. An instance of c(bsize/2) cancels out in both, so -the question reduces to whether asize digits is enough to hold -(asize == bsize ? c(bsize/2) : f(bsize/2)) digits + 2 bits. If asize < bsize, -then we're asking whether asize digits >= f(bsize/2) digits + 2 bits. By #4, -asize is at least f(bsize/2)+1 digits, so this in turn reduces to whether 1 -digit is enough to hold 2 bits. This is so since SHIFT=15 >= 2. If -asize == bsize, then we're asking whether bsize digits is enough to hold -c(bsize/2) digits + 2 bits, or equivalently (by #1) whether f(bsize/2) digits -is enough to hold 2 bits. This is so if bsize >= 2, which holds because -bsize >= KARATSUBA_CUTOFF >= 2. - -Note that since there's always enough room for (ah+al)*(bh+bl), and that's -clearly >= each of ah*bh and al*bl, there's always enough room to subtract -ah*bh and al*bl too. -""" - -def _k_lopsided_mul(a, b): - # Not in use anymore, only account for like 1% performance. Perhaps if we - # Got rid of the extra list allocation this would be more effective. - """ - b has at least twice the digits of a, and a is big enough that Karatsuba - would pay off *if* the inputs had balanced sizes. View b as a sequence - of slices, each with a->ob_size digits, and multiply the slices by a, - one at a time. This gives k_mul balanced inputs to work with, and is - also cache-friendly (we compute one double-width slice of the result - at a time, then move on, never bactracking except for the helpful - single-width slice overlap between successive partial sums). - """ - asize = a.numdigits() - bsize = b.numdigits() - # nbdone is # of b digits already multiplied - - assert asize > KARATSUBA_CUTOFF - assert 2 * asize <= bsize - - # Allocate result space, and zero it out. - ret = rbigint([NULLDIGIT] * (asize + bsize), 1) - - # Successive slices of b are copied into bslice. - #bslice = rbigint([0] * asize, 1) - # XXX we cannot pre-allocate, see comments below! - # XXX prevent one list from being created. - bslice = rbigint(sign=1) - - nbdone = 0 - while bsize > 0: - nbtouse = min(bsize, asize) - - # Multiply the next slice of b by a. - - #bslice.digits[:nbtouse] = b.digits[nbdone : nbdone + nbtouse] - # XXX: this would be more efficient if we adopted CPython's - # way to store the size, instead of resizing the list! - # XXX change the implementation, encoding length via the sign. - bslice._digits = b._digits[nbdone : nbdone + nbtouse] - bslice.size = nbtouse - product = _k_mul(a, bslice) - - # Add into result. - _v_iadd(ret, nbdone, ret.numdigits() - nbdone, - product, product.numdigits()) - - bsize -= nbtouse - nbdone += nbtouse - - ret._normalize() - return ret - def _inplace_divrem1(pout, pin, n, size=0): """ Divide bigint pin by non-zero digit n, storing quotient @@ -2147,7 +2049,7 @@ """ carry = _unsigned_widen_digit(0) - assert 0 <= d and d < SHIFT + #assert 0 <= d and d < SHIFT i = 0 while i < m: acc = a.uwidedigit(i) << d | carry @@ -2166,7 +2068,7 @@ acc = _unsigned_widen_digit(0) mask = (1 << d) - 1 - assert 0 <= d and d < SHIFT + #assert 0 <= d and d < SHIFT i = m-1 while i >= 0: acc = (carry << SHIFT) | a.uwidedigit(i) From pypy.commits at gmail.com Tue Nov 7 22:02:15 2017 From: pypy.commits at gmail.com (stian) Date: Tue, 07 Nov 2017 19:02:15 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Kill test for removed function Message-ID: <5a0273b7.a8a0df0a.d0ea3.3672@mx.google.com> Author: stian Branch: math-improvements Changeset: r92971:b9cf8efa4db1 Date: 2017-11-08 04:01 +0100 http://bitbucket.org/pypy/pypy/changeset/b9cf8efa4db1/ Log: Kill test for removed function diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -616,7 +616,7 @@ assert res3 == -num << z assert res4 == -num >> z - # Large digit + # Large digit, also invertion test. for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10): f1 = rbigint.fromlong(x) nf1 = rbigint.fromlong(-x) @@ -871,14 +871,6 @@ ret = lobj._k_mul(f1, f2) assert ret.tolong() == f1.tolong() * f2.tolong() - def test__k_lopsided_mul(self): - digs_a = KARATSUBA_CUTOFF + 3 - digs_b = 3 * digs_a - f1 = bigint([lobj.MASK] * digs_a, 1) - f2 = bigint([lobj.MASK] * digs_b, 1) - ret = lobj._k_lopsided_mul(f1, f2) - assert ret.tolong() == f1.tolong() * f2.tolong() - def test_longlong(self): max = 1L << (r_longlong.BITS-1) f1 = rbigint.fromlong(max-1) # fits in r_longlong From pypy.commits at gmail.com Wed Nov 8 11:47:49 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 08 Nov 2017 08:47:49 -0800 (PST) Subject: [pypy-commit] pypy default: merge the vmprof-0.4.10 branch: Message-ID: <5a033535.82d91c0a.e573f.9d49@mx.google.com> Author: Antonio Cuni Branch: Changeset: r92973:4b7ad9d4be0d Date: 2017-11-08 17:47 +0100 http://bitbucket.org/pypy/pypy/changeset/4b7ad9d4be0d/ Log: merge the vmprof-0.4.10 branch: - copy the recent changes to the C part of vmprof from github - make sure that the tests are actually testing something: so far, most of the were just silently skipped on the nightly buildbot :( - test_native is broken: it has been broken since the merge of vmprof-0.4.8, but we didn't notice - I expect some tests to fail on weird architectures. Once we know which, we can explicitly skip them diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ cffi>=1.4.0 +vmprof>=0.4.10 # required to parse log files in rvmprof tests # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -32,12 +32,21 @@ static size_t threads_size = 0; static size_t thread_count = 0; static size_t threads_size_step = 8; -#endif int vmprof_get_itimer_type(void) { return itimer_type; } +int vmprof_get_signal_type(void) { + return signal_type; +} +#endif + +#ifdef VMPROF_WINDOWS +#include "vmprof_win.h" +#endif + + int vmprof_is_enabled(void) { return is_enabled; } @@ -62,10 +71,6 @@ profile_interval_usec = value; } -int vmprof_get_signal_type(void) { - return signal_type; -} - char *vmprof_init(int fd, double interval, int memory, int proflines, const char *interp_name, int native, int real_time) { diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -15,7 +15,9 @@ #include #endif +#ifdef VMPROF_UNIX #include "vmprof_getpc.h" +#endif #ifdef VMPROF_LINUX #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c @@ -8,7 +8,7 @@ #include static mach_port_t mach_task; -#else +#elif defined(VMPROF_UNIX) #include #include #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c @@ -41,8 +41,6 @@ void vmprof_ignore_signals(int ignored) { if (ignored) { - /* set the last bit, and wait until concurrently-running signal - handlers finish */ __sync_add_and_fetch(&signal_handler_ignore, 1L); while (signal_handler_entries != 0L) { usleep(1); @@ -370,7 +368,7 @@ goto error; if (install_sigprof_timer() == -1) goto error; - vmprof_ignore_signals(0); + signal_handler_ignore = 0; return 0; error: @@ -394,7 +392,7 @@ int vmprof_disable(void) { - vmprof_ignore_signals(1); + signal_handler_ignore = 1; vmprof_set_profile_interval_usec(0); #ifdef VMP_SUPPORTS_NATIVE_PROFILING disable_cpyprof(); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c @@ -1,7 +1,7 @@ -// cannot include this header because it also has definitions -#include "windows.h" -#include "compat.h" -#include "vmp_stack.h" +#include "vmprof_win.h" + +volatile int thread_started = 0; +volatile int enabled = 0; HANDLE write_mutex; @@ -12,7 +12,20 @@ return 0; } -#include +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, + int auto_retry) +{ + char buf[2048]; + long namelen; + + namelen = (long)strnlen(code_name, 1023); + buf[0] = MARKER_VIRTUAL_IP; + *(intptr_t*)(buf + 1) = code_uid; + *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; + memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); + vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); + return 0; +} int vmp_write_all(const char *buf, size_t bufsize) { @@ -40,3 +53,168 @@ return 0; } +HANDLE write_mutex; + +#include "vmprof_common.h" + +int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) +{ + HRESULT result; + HANDLE hThread; + int depth; + CONTEXT ctx; +#ifdef RPYTHON_LL2CTYPES + return 0; // not much we can do +#else +#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) + return 0; // we can't freeze threads, unsafe +#else + hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); + if (!hThread) { + return -1; + } + result = SuspendThread(hThread); + if(result == 0xffffffff) + return -1; // possible, e.g. attached debugger or thread alread suspended + // find the correct thread +#ifdef RPYTHON_VMPROF + ctx.ContextFlags = CONTEXT_FULL; + if (!GetThreadContext(hThread, &ctx)) + return -1; + depth = get_stack_trace(tstate->vmprof_tl_stack, + stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); + stack->depth = depth; + stack->stack[depth++] = thread_id; + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#else + depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, + MAX_STACK_DEPTH, 0, 0); + stack->depth = depth; + stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#endif + +#endif +#endif +} + +#ifndef RPYTHON_VMPROF +static +PY_WIN_THREAD_STATE * get_current_thread_state(void) +{ +#if PY_MAJOR_VERSION < 3 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#else + return _PyThreadState_UncheckedGet(); +#endif +} +#endif + +long __stdcall vmprof_mainloop(void *arg) +{ +#ifdef RPYTHON_LL2CTYPES + // for tests only + return 0; +#else + // it is not a test case! + PY_WIN_THREAD_STATE *tstate; + HANDLE hThreadSnap = INVALID_HANDLE_VALUE; + prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); + int depth; +#ifndef RPYTHON_VMPROF + // cpython version + while (1) { + Sleep(vmprof_get_profile_interval_usec() * 1000); + if (!enabled) { + continue; + } + tstate = get_current_thread_state(); + if (!tstate) + continue; + depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); + } + } +#else + // pypy version + while (1) { + //Sleep(vmprof_get_profile_interval_usec() * 1000); + Sleep(10); + if (!enabled) { + continue; + } + _RPython_ThreadLocals_Acquire(); + tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head + tstate = _RPython_ThreadLocals_Enum(tstate); + while (tstate) { + if (tstate->ready == 42) { + depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + depth * sizeof(void *) + + sizeof(struct prof_stacktrace_s) - + offsetof(struct prof_stacktrace_s, marker)); + } + } + tstate = _RPython_ThreadLocals_Enum(tstate); + } + _RPython_ThreadLocals_Release(); + } +#endif +#endif +} + +RPY_EXTERN +int vmprof_enable(int memory, int native, int real_time) +{ + if (!thread_started) { + if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { + return -1; + } + thread_started = 1; + } + enabled = 1; + return 0; +} + +RPY_EXTERN +int vmprof_disable(void) +{ + char marker = MARKER_TRAILER; + (void)vmp_write_time_now(MARKER_TRAILER); + + enabled = 0; + vmp_set_profile_fileno(-1); + return 0; +} + +RPY_EXTERN +void vmprof_ignore_signals(int ignored) +{ + enabled = !ignored; +} + +int vmp_native_enable(void) +{ + return 0; +} + +void vmp_native_disable(void) +{ +} + +int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, + int max_depth, intptr_t pc) +{ + return 0; +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h @@ -3,20 +3,13 @@ #include "windows.h" #include "compat.h" #include "vmp_stack.h" - -HANDLE write_mutex; +#include int prepare_concurrent_bufs(void); -#include "vmprof_common.h" -#include - // This file has been inspired (but not copied from since the LICENSE // would not allow it) from verysleepy profiler -volatile int thread_started = 0; -volatile int enabled = 0; - int vmp_write_all(const char *buf, size_t bufsize); #ifdef RPYTHON_VMPROF @@ -26,178 +19,14 @@ #endif -RPY_EXTERN int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, - int auto_retry) -{ - char buf[2048]; - long namelen; + int auto_retry); - namelen = (long)strnlen(code_name, 1023); - buf[0] = MARKER_VIRTUAL_IP; - *(intptr_t*)(buf + 1) = code_uid; - *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; - memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); - vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); - return 0; -} - -int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) -{ - HRESULT result; - HANDLE hThread; - int depth; - CONTEXT ctx; -#ifdef RPYTHON_LL2CTYPES - return 0; // not much we can do -#else -#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) - return 0; // we can't freeze threads, unsafe -#else - hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); - if (!hThread) { - return -1; - } - result = SuspendThread(hThread); - if(result == 0xffffffff) - return -1; // possible, e.g. attached debugger or thread alread suspended - // find the correct thread -#ifdef RPYTHON_VMPROF - ctx.ContextFlags = CONTEXT_FULL; - if (!GetThreadContext(hThread, &ctx)) - return -1; - depth = get_stack_trace(tstate->vmprof_tl_stack, - stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); - stack->depth = depth; - stack->stack[depth++] = thread_id; - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#else - depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, - MAX_STACK_DEPTH, 0, 0); - stack->depth = depth; - stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#endif - -#endif -#endif -} - -#ifndef RPYTHON_VMPROF -static -PY_WIN_THREAD_STATE * get_current_thread_state(void) -{ -#if PY_MAJOR_VERSION < 3 - return _PyThreadState_Current; -#elif PY_VERSION_HEX < 0x03050200 - return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); -#else - return _PyThreadState_UncheckedGet(); -#endif -} -#endif - -long __stdcall vmprof_mainloop(void *arg) -{ -#ifdef RPYTHON_LL2CTYPES - // for tests only - return 0; -#else - // it is not a test case! - PY_WIN_THREAD_STATE *tstate; - HANDLE hThreadSnap = INVALID_HANDLE_VALUE; - prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); - int depth; -#ifndef RPYTHON_VMPROF - // cpython version - while (1) { - Sleep(profile_interval_usec * 1000); - if (!enabled) { - continue; - } - tstate = get_current_thread_state(); - if (!tstate) - continue; - depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); - } - } -#else - // pypy version - while (1) { - //Sleep(profile_interval_usec * 1000); - Sleep(10); - if (!enabled) { - continue; - } - _RPython_ThreadLocals_Acquire(); - tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head - tstate = _RPython_ThreadLocals_Enum(tstate); - while (tstate) { - if (tstate->ready == 42) { - depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - depth * sizeof(void *) + - sizeof(struct prof_stacktrace_s) - - offsetof(struct prof_stacktrace_s, marker)); - } - } - tstate = _RPython_ThreadLocals_Enum(tstate); - } - _RPython_ThreadLocals_Release(); - } -#endif -#endif -} - -RPY_EXTERN -int vmprof_enable(int memory, int native, int real_time) -{ - if (!thread_started) { - if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { - return -1; - } - thread_started = 1; - } - enabled = 1; - return 0; -} - -RPY_EXTERN -int vmprof_disable(void) -{ - char marker = MARKER_TRAILER; - (void)vmp_write_time_now(MARKER_TRAILER); - - enabled = 0; - vmp_set_profile_fileno(-1); - return 0; -} - -RPY_EXTERN -void vmprof_ignore_signals(int ignored) -{ - enabled = !ignored; -} - -int vmp_native_enable(void) { - return 0; -} - -void vmp_native_disable(void) { -} - +PY_WIN_THREAD_STATE * get_current_thread_state(void); +int vmprof_enable(int memory, int native, int real_time); +int vmprof_disable(void); +void vmprof_ignore_signals(int ignored); +int vmp_native_enable(void); +void vmp_native_disable(void); int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, - int max_depth, intptr_t pc) -{ - return 0; -} + int max_depth, intptr_t pc); diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -2,25 +2,43 @@ import urllib2, py from os.path import join +RVMPROF = py.path.local(__file__).join('..', '..') def github_raw_file(repo, path, branch='master'): - return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict( - repo=repo, path=path, branch=branch - )) + url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}" + return url.format(repo=repo, path=path, branch=branch) +def get_list_of_files(shared): + files = list(shared.visit('*.[ch]')) + # in PyPy we checkin the result of ./configure; as such, these files are + # not in github and can be skipped + files.remove(shared.join('libbacktrace', 'config-x86_32.h')) + files.remove(shared.join('libbacktrace', 'config-x86_64.h')) + files.remove(shared.join('libbacktrace', 'gstdint.h')) + return files def test_same_file(): - for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'): - for file in files: - if not (file.endswith(".c") or file.endswith(".h")): - continue - url = github_raw_file("vmprof/vmprof-python", "src/%s" % file) - source = urllib2.urlopen(url).read() - # - dest = py.path.local(join(root, file)).read() - if source != dest: - raise AssertionError("%s was updated, but changes were" - "not copied over to PyPy" % url) - else: - print("%s matches" % url) - break # do not walk dirs + shared = RVMPROF.join('src', 'shared') + files = get_list_of_files(shared) + assert files, 'cannot find any C file, probably the directory is wrong?' + no_matches = [] + print + for file in files: + path = file.relto(shared) + url = github_raw_file("vmprof/vmprof-python", "src/%s" % path) + source = urllib2.urlopen(url).read() + dest = file.read() + shortname = file.relto(RVMPROF) + if source == dest: + print '%s matches' % shortname + else: + print '%s does NOT match' % shortname + no_matches.append(file) + # + if no_matches: + print + print 'The following file dit NOT match' + for f in no_matches: + print ' ', f.relto(RVMPROF) + raise AssertionError("some files were updated on github, " + "but were not copied here") diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -1,214 +1,183 @@ import py, os +import pytest +import time from rpython.tool.udir import udir from rpython.rlib import rvmprof from rpython.translator.c.test.test_genc import compile -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.nonconst import NonConstant from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.lltypesystem import rffi, lltype + at pytest.mark.usefixtures('init') +class RVMProfTest(object): -def test_vmprof_execute_code_1(): + ENTRY_POINT_ARGS = () - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass + class MyCode(object): + def __init__(self, name='py:code:0:noname'): + self.name = name - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): + def get_name(self): + return self.name + + @pytest.fixture + def init(self): + self.register() + self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS) + + def register(self): + rvmprof.register_code_object_class(self.MyCode, + self.MyCode.get_name) + + +class TestExecuteCode(RVMProfTest): + + def entry_point(self): + res = self.main(self.MyCode(), 5) + assert res == 42 + return 0 + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): print num return 42 - def f(): - res = main(MyCode(), 5) + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 + + +class TestResultClass(RVMProfTest): + + class A: pass + + @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code, + result_class=A) + def main(self, num, code): + print num + return self.A() + + def entry_point(self): + a = self.main(7, self.MyCode()) + assert isinstance(a, self.A) + return 0 + + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 + + +class TestRegisterCode(RVMProfTest): + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): + print num + return 42 + + def entry_point(self): + code = self.MyCode() + rvmprof.register_code(code, lambda code: 'some code') + res = self.main(code, 5) assert res == 42 return 0 - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 -def test_vmprof_execute_code_2(): +class RVMProfSamplingTest(RVMProfTest): - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass + # the kernel will deliver SIGPROF at max 250 Hz. See also + # https://github.com/vmprof/vmprof-python/issues/163 + SAMPLING_INTERVAL = 1/250.0 - class A: - pass + @pytest.fixture + def init(self, tmpdir): + self.tmpdir = tmpdir + self.tmpfile = tmpdir.join('profile.vmprof') + self.tmpfilename = str(self.tmpfile) + super(RVMProfSamplingTest, self).init() - @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code, - result_class=A) - def main(num, code): - print num - return A() + ENTRY_POINT_ARGS = (int, float) + def entry_point(self, value, delta_t): + code = self.MyCode('py:code:52:test_enable') + rvmprof.register_code(code, self.MyCode.get_name) + fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) + rvmprof.enable(fd, self.SAMPLING_INTERVAL) + start = time.time() + res = 0 + while time.time() < start+delta_t: + res = self.main(code, value) + rvmprof.disable() + os.close(fd) + return res - def f(): - a = main(7, MyCode()) - assert isinstance(a, A) - return 0 + def approx_equal(self, a, b, tolerance=0.1): + max_diff = (a+b)/2.0 * tolerance + return abs(a-b) < max_diff - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 +class TestEnable(RVMProfSamplingTest): -def test_register_code(): - - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - print num - return 42 - - def f(): - code = MyCode() - rvmprof.register_code(code, lambda code: 'some code') - res = main(code, 5) - assert res == 42 - return 0 - - assert f() == 0 - fn = compile(f, [], gcpolicy="minimark") - assert fn() == 0 - - -def test_enable(): - - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - print num + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): s = 0 - for i in range(num): + for i in range(count): s += (i << 1) - if s % 2123423423 == 0: - print s return s - tmpfilename = str(udir.join('test_rvmprof')) + def test(self): + from vmprof import read_profile + assert self.entry_point(10**4, 0.1) == 99990000 + assert self.tmpfile.check() + self.tmpfile.remove() + # + assert self.rpy_entry_point(10**4, 0.5) == 99990000 + assert self.tmpfile.check() + prof = read_profile(self.tmpfilename) + tree = prof.get_tree() + assert tree.name == 'py:code:52:test_enable' + assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL) - def f(): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) - code = MyCode() - rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) - if we_are_translated(): - num = 100000000 - period = 0.0001 + +class TestNative(RVMProfSamplingTest): + + @pytest.fixture + def init(self, tmpdir): + eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + separate_module_sources=[""" + RPY_EXTERN int native_func(int d) { + int j = 0; + if (d > 0) { + return native_func(d-1); + } else { + for (int i = 0; i < 42000; i++) { + j += 1; + } + } + return j; + } + """]) + self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, + compilation_info=eci) + super(TestNative, self).init(tmpdir) + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): + if count > 0: + return self.main(code, count-1) else: - num = 10000 - period = 0.9 - rvmprof.enable(fd, period) - res = main(code, num) - #assert res == 499999500000 - rvmprof.disable() - os.close(fd) - return 0 + return self.native_func(100) - def check_profile(filename): - from vmprof import read_profile - - prof = read_profile(filename) - assert prof.get_tree().name.startswith("py:") - assert prof.get_tree().count - - assert f() == 0 - assert os.path.exists(tmpfilename) - fn = compile(f, [], gcpolicy="minimark") - assert fn() == 0 - try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) - -def test_native(): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], - separate_module_sources=[""" - RPY_EXTERN int native_func(int d) { - int j = 0; - if (d > 0) { - return native_func(d-1); - } else { - for (int i = 0; i < 42000; i++) { - j += d; - } - } - return j; - } - """]) - - native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, - compilation_info=eci) - - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - if num > 0: - return main(code, num-1) - else: - return native_func(100) - - tmpfilename = str(udir.join('test_rvmprof')) - - def f(): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) - code = MyCode() - rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666) - num = 10000 - period = 0.0001 - rvmprof.enable(fd, period, native=1) - for i in range(num): - res = main(code, 3) - rvmprof.disable() - os.close(fd) - return 0 - - def check_profile(filename): + def test(self): + # XXX: this test is known to fail since rev a4f077ba651c, but buildbot + # never ran it. FIXME. from vmprof import read_profile from vmprof.show import PrettyPrinter - - prof = read_profile(filename) + assert self.rpy_entry_point(3, 0.5) == 42000 + assert self.tmpfile.check() + # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() p = PrettyPrinter() p._print_tree(tree) @@ -227,16 +196,3 @@ del not_found[i] break assert not_found == [] - - fn = compile(f, [], gcpolicy="incminimark", lldebug=True) - assert fn() == 0 - try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) - diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py --- a/rpython/translator/translator.py +++ b/rpython/translator/translator.py @@ -141,6 +141,9 @@ if isinstance(func, FunctionGraph): return func result = [] + if hasattr(func, 'im_func'): + # make it possible to translate bound methods + func = func.im_func for graph in translator.graphs: if getattr(graph, 'func', None) is func: result.append(graph) From pypy.commits at gmail.com Wed Nov 8 11:47:47 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 08 Nov 2017 08:47:47 -0800 (PST) Subject: [pypy-commit] pypy vmprof-0.4.10: close this branch to be merged Message-ID: <5a033533.cc8ddf0a.830c6.4563@mx.google.com> Author: Antonio Cuni Branch: vmprof-0.4.10 Changeset: r92972:1b871922f356 Date: 2017-11-08 17:41 +0100 http://bitbucket.org/pypy/pypy/changeset/1b871922f356/ Log: close this branch to be merged From pypy.commits at gmail.com Wed Nov 8 11:49:48 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 08 Nov 2017 08:49:48 -0800 (PST) Subject: [pypy-commit] pypy default: update vmprof up to github rev c8154361 Message-ID: <5a0335ac.c97e1c0a.2b7ff.b964@mx.google.com> Author: Antonio Cuni Branch: Changeset: r92974:b207c72d71ad Date: 2017-11-08 17:49 +0100 http://bitbucket.org/pypy/pypy/changeset/b207c72d71ad/ Log: update vmprof up to github rev c8154361 diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c --- a/rpython/rlib/rvmprof/src/shared/machine.c +++ b/rpython/rlib/rvmprof/src/shared/machine.c @@ -28,7 +28,7 @@ #elif __linux__ return "linux"; #elif __FreeBSD__ - return "freebsd" + return "freebsd"; #else #error "Unknown compiler" #endif diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c --- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c +++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c @@ -29,6 +29,7 @@ static int (*unw_is_signal_frame)(unw_cursor_t *) = NULL; static int (*unw_getcontext)(unw_context_t *) = NULL; #else +#define UNW_LOCAL_ONLY #include #endif From pypy.commits at gmail.com Wed Nov 8 13:17:13 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 08 Nov 2017 10:17:13 -0800 (PST) Subject: [pypy-commit] pypy default: Check behaviour of bytearray as well Message-ID: <5a034a29.88acdf0a.4f669.2b89@mx.google.com> Author: Ronan Lamy Branch: Changeset: r92975:461d62b49f22 Date: 2017-11-08 18:16 +0000 http://bitbucket.org/pypy/pypy/changeset/461d62b49f22/ Log: Check behaviour of bytearray as well diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py --- a/extra_tests/test_bytes.py +++ b/extra_tests/test_bytes.py @@ -1,25 +1,27 @@ from hypothesis import strategies as st from hypothesis import given, example - at given(st.binary(), st.binary(), st.binary()) +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) def test_find(u, prefix, suffix): s = prefix + u + suffix assert 0 <= s.find(u) <= len(prefix) assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_index(u, prefix, suffix): s = prefix + u + suffix assert 0 <= s.index(u) <= len(prefix) assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_rfind(u, prefix, suffix): s = prefix + u + suffix assert s.rfind(u) >= len(prefix) assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_rindex(u, prefix, suffix): s = prefix + u + suffix assert s.rindex(u) >= len(prefix) @@ -34,20 +36,20 @@ start = max(start + len(u), 0) return start, end - at given(st.binary(), st.binary()) + at given(st_bytestring, st_bytestring) def test_startswith_basic(u, v): assert u.startswith(v) is (u[:len(v)] == v) @example(b'x', b'', 1) @example(b'x', b'', 2) - at given(st.binary(), st.binary(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers()) def test_startswith_start(u, v, start): expected = u[start:].startswith(v) if v else (start <= len(u)) assert u.startswith(v, start) is expected @example(b'x', b'', 1, 0) @example(b'xx', b'', -1, 0) - at given(st.binary(), st.binary(), st.integers(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) def test_startswith_3(u, v, start, end): if v: expected = u[start:end].startswith(v) @@ -56,7 +58,7 @@ expected = start0 <= len(u) and start0 <= end0 assert u.startswith(v, start, end) is expected - at given(st.binary(), st.binary()) + at given(st_bytestring, st_bytestring) def test_endswith_basic(u, v): if len(v) > len(u): assert u.endswith(v) is False @@ -65,14 +67,14 @@ @example(b'x', b'', 1) @example(b'x', b'', 2) - at given(st.binary(), st.binary(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers()) def test_endswith_2(u, v, start): expected = u[start:].endswith(v) if v else (start <= len(u)) assert u.endswith(v, start) is expected @example(b'x', b'', 1, 0) @example(b'xx', b'', -1, 0) - at given(st.binary(), st.binary(), st.integers(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) def test_endswith_3(u, v, start, end): if v: expected = u[start:end].endswith(v) From pypy.commits at gmail.com Wed Nov 8 13:34:04 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 08 Nov 2017 10:34:04 -0800 (PST) Subject: [pypy-commit] pypy default: remove unused distutils_platform Message-ID: <5a034e1c.c6a2df0a.13ea3.517e@mx.google.com> Author: Matti Picus Branch: Changeset: r92976:3d3ef332444f Date: 2017-11-08 20:28 +0200 http://bitbucket.org/pypy/pypy/changeset/3d3ef332444f/ Log: remove unused distutils_platform diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py --- a/rpython/translator/platform/__init__.py +++ b/rpython/translator/platform/__init__.py @@ -320,9 +320,7 @@ else: host_factory = Cygwin64 else: - # pray - from rpython.translator.platform.distutils_platform import DistutilsPlatform - host_factory = DistutilsPlatform + raise ValueError('unknown sys.platform "%s"', sys.platform) platform = host = host_factory() @@ -335,9 +333,6 @@ elif new_platform == 'arm': from rpython.translator.platform.arm import ARM return ARM(cc) - elif new_platform == 'distutils': - from rpython.translator.platform.distutils_platform import DistutilsPlatform - return DistutilsPlatform() else: raise ValueError("platform = %s" % (new_platform,)) diff --git a/rpython/translator/platform/distutils_platform.py b/rpython/translator/platform/distutils_platform.py deleted file mode 100644 --- a/rpython/translator/platform/distutils_platform.py +++ /dev/null @@ -1,157 +0,0 @@ -import py, os, sys - -from rpython.translator.platform import Platform, log, CompilationError -from rpython.translator.tool import stdoutcapture - -def log_spawned_cmd(spawn): - def spawn_and_log(cmd, *args, **kwds): - log.execute(' '.join(cmd)) - return spawn(cmd, *args, **kwds) - return spawn_and_log - -CFLAGS = ['-O3'] - -if os.name != 'nt': - so_ext = 'so' -else: - so_ext = 'dll' - -class DistutilsPlatform(Platform): - """ This is a generic distutils platform. I hope it'll go away at some - point soon completely - """ - name = "distutils" - so_ext = so_ext - - def __init__(self, cc=None): - self.cc = cc - if self.name == "distutils": - self.name = sys.platform - - def _ensure_correct_math(self): - if self.name != 'win32': - return # so far - from distutils import sysconfig - gcv = sysconfig.get_config_vars() - opt = gcv.get('OPT') # not always existent - if opt and '/Op' not in opt: - opt += '/Op' - gcv['OPT'] = opt - - def compile(self, cfilenames, eci, outputfilename=None, standalone=True): - self._ensure_correct_math() - self.cfilenames = cfilenames - if standalone: - ext = '' - else: - ext = so_ext - self.standalone = standalone - self.libraries = list(eci.libraries) - self.include_dirs = list(eci.include_dirs) - self.library_dirs = list(eci.library_dirs) - self.compile_extra = list(eci.compile_extra) - self.link_extra = list(eci.link_extra) - self.frameworks = list(eci.frameworks) - if not self.name in ('win32', 'darwin', 'cygwin'): # xxx - if 'm' not in self.libraries: - self.libraries.append('m') - self.compile_extra += CFLAGS + ['-fomit-frame-pointer'] - if 'pthread' not in self.libraries: - self.libraries.append('pthread') - if self.name != 'sunos5': - self.compile_extra += ['-pthread'] - self.link_extra += ['-pthread'] - else: - self.compile_extra += ['-pthreads'] - self.link_extra += ['-lpthread'] - if self.name == 'win32': - self.link_extra += ['/DEBUG'] # generate .pdb file - if self.name == 'darwin': - # support Fink & Darwinports - for s in ('/sw/', '/opt/local/'): - if s + 'include' not in self.include_dirs and \ - os.path.exists(s + 'include'): - self.include_dirs.append(s + 'include') - if s + 'lib' not in self.library_dirs and \ - os.path.exists(s + 'lib'): - self.library_dirs.append(s + 'lib') - self.compile_extra += CFLAGS + ['-fomit-frame-pointer'] - for framework in self.frameworks: - self.link_extra += ['-framework', framework] - - if outputfilename is None: - self.outputfilename = py.path.local(cfilenames[0]).new(ext=ext) - else: - self.outputfilename = py.path.local(outputfilename) - self.eci = eci - import distutils.errors - basename = self.outputfilename.new(ext='') - data = '' - try: - saved_environ = os.environ.copy() - c = stdoutcapture.Capture(mixed_out_err=True) - try: - self._build() - finally: - # workaround for a distutils bugs where some env vars can - # become longer and longer every time it is used - for key, value in saved_environ.items(): - if os.environ.get(key) != value: - os.environ[key] = value - foutput, foutput = c.done() - data = foutput.read() - if data: - fdump = basename.new(ext='errors').open("wb") - fdump.write(data) - fdump.close() - except (distutils.errors.CompileError, - distutils.errors.LinkError): - raise CompilationError('', data) - except: - print >>sys.stderr, data - raise - return self.outputfilename - - def _build(self): - from distutils.ccompiler import new_compiler - from distutils import sysconfig - compiler = new_compiler(force=1) - if self.cc is not None: - for c in '''compiler compiler_so compiler_cxx - linker_exe linker_so'''.split(): - compiler.executables[c][0] = self.cc - if not self.standalone: - sysconfig.customize_compiler(compiler) # XXX - compiler.spawn = log_spawned_cmd(compiler.spawn) - objects = [] - for cfile in self.cfilenames: - cfile = py.path.local(cfile) - compile_extra = self.compile_extra[:] - - old = cfile.dirpath().chdir() - try: - res = compiler.compile([cfile.basename], - include_dirs=self.eci.include_dirs, - extra_preargs=compile_extra) - assert len(res) == 1 - cobjfile = py.path.local(res[0]) - assert cobjfile.check() - objects.append(str(cobjfile)) - finally: - old.chdir() - - if self.standalone: - cmd = compiler.link_executable - else: - cmd = compiler.link_shared_object - cmd(objects, str(self.outputfilename), - libraries=self.eci.libraries, - extra_preargs=self.link_extra, - library_dirs=self.eci.library_dirs) - - def _include_dirs_for_libffi(self): - return ['/usr/include/libffi'] - - def _library_dirs_for_libffi(self): - return ['/usr/lib/libffi'] - diff --git a/rpython/translator/platform/test/test_distutils.py b/rpython/translator/platform/test/test_distutils.py deleted file mode 100644 --- a/rpython/translator/platform/test/test_distutils.py +++ /dev/null @@ -1,17 +0,0 @@ - -from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest -from rpython.translator.platform.distutils_platform import DistutilsPlatform -import py - -class TestDistutils(BasicTest): - platform = DistutilsPlatform() - - def test_nice_errors(self): - py.test.skip("Unsupported") - - def test_900_files(self): - py.test.skip('Makefiles not suppoerted') - - def test_precompiled_headers(self): - py.test.skip('Makefiles not suppoerted') - From pypy.commits at gmail.com Wed Nov 8 15:00:34 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 08 Nov 2017 12:00:34 -0800 (PST) Subject: [pypy-commit] pypy default: remove maemo platform Message-ID: <5a036262.21b9df0a.93d86.9fb8@mx.google.com> Author: Matti Picus Branch: Changeset: r92977:73ab8f585ba4 Date: 2017-11-08 21:59 +0200 http://bitbucket.org/pypy/pypy/changeset/73ab8f585ba4/ Log: remove maemo platform diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -39,9 +39,7 @@ CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache')) PLATFORMS = [ - 'maemo', 'host', - 'distutils', 'arm', ] diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py --- a/rpython/translator/c/test/test_standalone.py +++ b/rpython/translator/c/test/test_standalone.py @@ -1102,22 +1102,6 @@ assert out.strip() == 'ok' -class TestMaemo(TestStandalone): - def setup_class(cls): - py.test.skip("TestMaemo: tests skipped for now") - from rpython.translator.platform.maemo import check_scratchbox - check_scratchbox() - config = get_combined_translation_config(translating=True) - config.translation.platform = 'maemo' - cls.config = config - - def test_profopt(self): - py.test.skip("Unsupported") - - def test_prof_inline(self): - py.test.skip("Unsupported") - - class TestThread(object): gcrootfinder = 'shadowstack' config = None diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py --- a/rpython/translator/platform/__init__.py +++ b/rpython/translator/platform/__init__.py @@ -327,9 +327,6 @@ def pick_platform(new_platform, cc): if new_platform == 'host': return host_factory(cc) - elif new_platform == 'maemo': - from rpython.translator.platform.maemo import Maemo - return Maemo(cc) elif new_platform == 'arm': from rpython.translator.platform.arm import ARM return ARM(cc) diff --git a/rpython/translator/platform/maemo.py b/rpython/translator/platform/maemo.py deleted file mode 100644 --- a/rpython/translator/platform/maemo.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Support for Maemo.""" - -import py, os - -from rpython.tool.udir import udir -from rpython.translator.platform import ExecutionResult, log -from rpython.translator.platform.linux import Linux -from rpython.translator.platform.posix import GnuMakefile, _run_subprocess - -def check_scratchbox(): - # in order to work, that file must exist and be executable by us - if not os.access('/scratchbox/login', os.X_OK): - py.test.skip("No scratchbox detected") - -class Maemo(Linux): - name = "maemo" - - available_includedirs = ('/usr/include', '/tmp') - copied_cache = {} - - def _invent_new_name(self, basepath, base): - pth = basepath.join(base) - num = 0 - while pth.check(): - pth = basepath.join('%s_%d' % (base,num)) - num += 1 - return pth.ensure(dir=1) - - def _copy_files_to_new_dir(self, dir_from, pattern='*.[ch]'): - try: - return self.copied_cache[dir_from] - except KeyError: - new_dirpath = self._invent_new_name(udir, 'copied_includes') - files = py.path.local(dir_from).listdir(pattern) - for f in files: - f.copy(new_dirpath) - # XXX - srcdir = py.path.local(dir_from).join('src') - if srcdir.check(dir=1): - target = new_dirpath.join('src').ensure(dir=1) - for f in srcdir.listdir(pattern): - f.copy(target) - # XXX - self.copied_cache[dir_from] = new_dirpath - return new_dirpath - - def _preprocess_include_dirs(self, include_dirs): - """ Tweak includedirs so they'll be available through scratchbox - """ - res_incl_dirs = [] - for incl_dir in include_dirs: - incl_dir = py.path.local(incl_dir) - for available in self.available_includedirs: - if incl_dir.relto(available): - res_incl_dirs.append(str(incl_dir)) - break - else: - # we need to copy files to a place where it's accessible - res_incl_dirs.append(self._copy_files_to_new_dir(incl_dir)) - return res_incl_dirs - - def _execute_c_compiler(self, cc, args, outname): - log.execute('/scratchbox/login ' + cc + ' ' + ' '.join(args)) - args = [cc] + args - returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args) - self._handle_error(returncode, stdout, stderr, outname) - - def execute(self, executable, args=[], env=None): - if isinstance(args, str): - args = str(executable) + ' ' + args - log.message('executing /scratchbox/login ' + args) - else: - args = [str(executable)] + args - log.message('executing /scratchbox/login ' + ' '.join(args)) - returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args, - env) - return ExecutionResult(returncode, stdout, stderr) - - def _include_dirs_for_libffi(self): - # insanely obscure dir - return ['/usr/include/arm-linux-gnueabi/'] - - def _library_dirs_for_libffi(self): - # on the other hand, library lands in usual place... - return [] - - def execute_makefile(self, path_to_makefile, extra_opts=[]): - if isinstance(path_to_makefile, GnuMakefile): - path = path_to_makefile.makefile_dir - else: - path = path_to_makefile - log.execute('make %s in %s' % (" ".join(extra_opts), path)) - returncode, stdout, stderr = _run_subprocess( - '/scratchbox/login', ['make', '-C', str(path)] + extra_opts) - self._handle_error(returncode, stdout, stderr, path.join('make')) diff --git a/rpython/translator/platform/test/test_maemo.py b/rpython/translator/platform/test/test_maemo.py deleted file mode 100644 --- a/rpython/translator/platform/test/test_maemo.py +++ /dev/null @@ -1,37 +0,0 @@ - -""" File containing maemo platform tests -""" - -import py -from rpython.tool.udir import udir -from rpython.translator.platform.maemo import Maemo, check_scratchbox -from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest -from rpython.translator.tool.cbuild import ExternalCompilationInfo - -class TestMaemo(BasicTest): - platform = Maemo() - strict_on_stderr = False - - def setup_class(cls): - py.test.skip("TestMaemo: tests skipped for now") - check_scratchbox() - - def test_includes_outside_scratchbox(self): - cfile = udir.join('test_includes_outside_scratchbox.c') - cfile.write(''' - #include - #include "test.h" - int main() - { - printf("%d\\n", XXX_STUFF); - return 0; - } - ''') - includedir = py.path.local(__file__).dirpath().join('include') - eci = ExternalCompilationInfo(include_dirs=(includedir,)) - executable = self.platform.compile([cfile], eci) - res = self.platform.execute(executable) - self.check_res(res) - - def test_environment_inheritance(self): - py.test.skip("FIXME") diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -151,6 +151,6 @@ assert platform.host == platform.platform assert platform.is_host_build() - platform.set_platform('maemo', None) + platform.set_platform('arm', None) assert platform.host != platform.platform assert not platform.is_host_build() From pypy.commits at gmail.com Wed Nov 8 15:07:33 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 08 Nov 2017 12:07:33 -0800 (PST) Subject: [pypy-commit] pypy default: the only possible non-host platform is arm, not sure it works Message-ID: <5a036405.178fdf0a.93bfd.18dd@mx.google.com> Author: Matti Picus Branch: Changeset: r92978:7ba4c7f12fd5 Date: 2017-11-08 22:06 +0200 http://bitbucket.org/pypy/pypy/changeset/7ba4c7f12fd5/ Log: the only possible non-host platform is arm, not sure it works diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -147,10 +147,13 @@ def test_is_host_build(): + from platform import machine from rpython.translator import platform assert platform.host == platform.platform assert platform.is_host_build() - platform.set_platform('arm', None) - assert platform.host != platform.platform - assert not platform.is_host_build() + # do we support non-host builds? + if machine().startswith('arm'): + platform.set_platform('arm', None) + assert platform.host != platform.platform + assert not platform.is_host_build() From pypy.commits at gmail.com Wed Nov 8 16:14:42 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 08 Nov 2017 13:14:42 -0800 (PST) Subject: [pypy-commit] pypy vmprof-win32: close outdated branch Message-ID: <5a0373c2.d5301c0a.234af.66a6@mx.google.com> Author: Matti Picus Branch: vmprof-win32 Changeset: r92979:7aada6f7b5bb Date: 2017-11-08 23:01 +0200 http://bitbucket.org/pypy/pypy/changeset/7aada6f7b5bb/ Log: close outdated branch From pypy.commits at gmail.com Wed Nov 8 16:14:44 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 08 Nov 2017 13:14:44 -0800 (PST) Subject: [pypy-commit] pypy win32-vmprof: start to run tests on win32 Message-ID: <5a0373c4.8cabdf0a.5316d.aae7@mx.google.com> Author: Matti Picus Branch: win32-vmprof Changeset: r92980:edb8f85891e5 Date: 2017-11-08 23:17 +0200 http://bitbucket.org/pypy/pypy/changeset/edb8f85891e5/ Log: start to run tests on win32 diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -62,7 +62,6 @@ SHARED.join('compat.c'), SHARED.join('machine.c'), SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_mt.c'), SHARED.join('vmprof_memory.c'), SHARED.join('vmprof_common.c'), # symbol table already in separate_module_files @@ -70,6 +69,10 @@ post_include_bits=[], compile_extra=compile_extra ) +if sys.platform.startswith('linux'): + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) global_eci = ExternalCompilationInfo(**eci_kwds) def configure_libbacktrace_linux(): diff --git a/rpython/rlib/rvmprof/test/__init__.py b/rpython/rlib/rvmprof/test/__init__.py --- a/rpython/rlib/rvmprof/test/__init__.py +++ b/rpython/rlib/rvmprof/test/__init__.py @@ -1,5 +0,0 @@ -import pytest -import platform - -if not platform.machine().startswith('x86'): - pytest.skip() diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -25,8 +25,9 @@ print for file in files: path = file.relto(shared) + path = path.replace(os.sep, '/') url = github_raw_file("vmprof/vmprof-python", "src/%s" % path) - source = urllib2.urlopen(url).read() + source = urllib2.urlopen(url).read().replace('\r\n', '\n') dest = file.read() shortname = file.relto(RVMPROF) if source == dest: From pypy.commits at gmail.com Wed Nov 8 20:55:39 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 08 Nov 2017 17:55:39 -0800 (PST) Subject: [pypy-commit] pypy default: "eh". On pypy we need to be careful in which order we have pendingblocks. Message-ID: <5a03b59b.c39cdf0a.b72ee.bc53@mx.google.com> Author: fijal Branch: Changeset: r92981:cb9634421fa2 Date: 2017-11-08 17:54 -0800 http://bitbucket.org/pypy/pypy/changeset/cb9634421fa2/ Log: "eh". On pypy we need to be careful in which order we have pendingblocks. Otherwise we end up in a setup where we have blocks a, b and c where a and b are blocked because c needs to add an attribute, but c is never appended since popitem() would always return an a or b. I wonder if the same condition can be repeated on CPython, but I cannot. Unclear how would you write a test for it since it depends on dictionary order. diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -15,10 +15,34 @@ typeof, s_ImpossibleValue, SomeInstance, intersection, difference) from rpython.annotator.bookkeeper import Bookkeeper from rpython.rtyper.normalizecalls import perform_normalizations +from collections import deque log = AnsiLogger("annrpython") +class ShuffleDict(object): + def __init__(self): + self._d = {} + self.keys = deque() + + def __setitem__(self, k, v): + if k in self._d: + self._d[k] = v + else: + self._d[k] = v + self.keys.append(k) + + def __getitem__(self, k): + return self._d[k] + + def popitem(self): + key = self.keys.popleft() + item = self._d.pop(key) + return (key, item) + + def __nonzero__(self): + return bool(self._d) + class RPythonAnnotator(object): """Block annotator for RPython. See description in doc/translation.txt.""" @@ -33,7 +57,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = {} # map {block: graph-containing-it} + self.pendingblocks = ShuffleDict() # map {block: graph-containing-it} self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed From pypy.commits at gmail.com Thu Nov 9 07:38:27 2017 From: pypy.commits at gmail.com (stian) Date: Thu, 09 Nov 2017 04:38:27 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Remove unused variable and make these size calculations unsigned Message-ID: <5a044c43.d2addf0a.ed1d0.1282@mx.google.com> Author: stian Branch: math-improvements Changeset: r92982:1a7dc37b2d5d Date: 2017-11-09 13:37 +0100 http://bitbucket.org/pypy/pypy/changeset/1a7dc37b2d5d/ Log: Remove unused variable and make these size calculations unsigned diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -848,14 +848,14 @@ mod = self.int_and_(digit - 1) else: # Perform - size = self.numdigits() - 1 + size = UDIGIT_TYPE(self.numdigits() - 1) if size > 0: rem = self.widedigit(size) - size -= 1 - while size >= 0: + while size > 0: + size -= 1 rem = ((rem << SHIFT) | self.digit(size)) % digit - size -= 1 + else: rem = self.widedigit(0) % digit @@ -890,13 +890,13 @@ mod = self.int_and_(digit - 1) else: # Perform - size = self.numdigits() - 1 + size = UDIGIT_TYPE(self.numdigits() - 1) + if size > 0: rem = self.widedigit(size) - size -= 1 - while size >= 0: + while size > 0: + size -= 1 rem = ((rem << SHIFT) | self.digit(size)) % digit - size -= 1 else: rem = self.digit(0) % digit @@ -981,7 +981,7 @@ # XXX failed to implement raise ValueError("bigint pow() too negative") - size_b = b.numdigits() + size_b = UDIGIT_TYPE(b.numdigits()) if b.sign == 0: return ONERBIGINT @@ -1040,8 +1040,9 @@ if size_b <= FIVEARY_CUTOFF: # Left-to-right binary exponentiation (HAC Algorithm 14.79) # http://www.cacr.math.uwaterloo.ca/hac/about/chap14.pdf - size_b -= 1 - while size_b >= 0: + + while size_b > 0: + size_b -= 1 bi = b.digit(size_b) j = 1 << (SHIFT-1) while j != 0: @@ -1049,7 +1050,7 @@ if bi & j: z = _help_mult(z, a, c) j >>= 1 - size_b -= 1 + else: # Left-to-right 5-ary exponentiation (HAC Algorithm 14.82) @@ -1328,7 +1329,7 @@ hishift = SHIFT - loshift z = rbigint([NULLDIGIT] * newsize, self.sign, newsize) i = 0 - inverted = False + while i < newsize: digit = self.udigit(wordshift) if invert and i == 0 and wordshift == 0: From pypy.commits at gmail.com Thu Nov 9 13:09:26 2017 From: pypy.commits at gmail.com (stian) Date: Thu, 09 Nov 2017 10:09:26 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Dont need widedigit | widedigit, when widedigit | digit will do. Message-ID: <5a0499d6.05ac1c0a.9ad53.facb@mx.google.com> Author: stian Branch: math-improvements Changeset: r92983:5c8e47fa96a6 Date: 2017-11-09 19:08 +0100 http://bitbucket.org/pypy/pypy/changeset/5c8e47fa96a6/ Log: Dont need widedigit | widedigit, when widedigit | digit will do. diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2049,7 +2049,7 @@ * result in z[0:m], and return the d bits shifted out of the top. """ - carry = _unsigned_widen_digit(0) + carry = 0 #assert 0 <= d and d < SHIFT i = 0 while i < m: @@ -2072,7 +2072,7 @@ #assert 0 <= d and d < SHIFT i = m-1 while i >= 0: - acc = (carry << SHIFT) | a.uwidedigit(i) + acc = (carry << SHIFT) | a.udigit(i) carry = acc & mask z.setdigit(i, acc >> d) i -= 1 @@ -2127,10 +2127,10 @@ else: vtop = v.widedigit(j) << SHIFT #assert vtop <= wm1 - vv = vtop | v.widedigit(abs(j-1)) + vv = vtop | v.digit(abs(j-1)) q = vv / wm1 r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q. - vj2 = v.widedigit(abs(j-2)) + vj2 = v.digit(abs(j-2)) while wm2 * q > ((r << SHIFT) | vj2): q -= 1 r += wm1 diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -144,7 +144,7 @@ rl_op2 = rbigint.fromlong(op2) r1 = rl_op1.mod(rl_op2) r2 = op1 % op2 - print op1, op2 + assert r1.tolong() == r2 def test_int_mod(self): From pypy.commits at gmail.com Thu Nov 9 13:12:56 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 09 Nov 2017 10:12:56 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on types Message-ID: <5a049aa8.14a1df0a.ca2b9.3267@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92984:ffe57298623b Date: 2017-11-09 18:12 +0000 http://bitbucket.org/pypy/pypy/changeset/ffe57298623b/ Log: Implement __text_signature__ on types diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -16,7 +16,7 @@ @not_rpython def __init__(self, __name, __base=None, __total_ordering__=None, __buffer=None, __confirm_applevel_del__=False, - variable_sized=False, **rawdict): + _text_signature_=None, variable_sized=False, **rawdict): "initialization-time only" self.name = __name if __base is None: @@ -36,6 +36,7 @@ assert '__del__' not in rawdict self.weakrefable = '__weakref__' in rawdict self.doc = rawdict.get('__doc__', None) + self.text_signature = _text_signature_ for base in bases: self.hasdict |= base.hasdict self.weakrefable |= base.weakrefable diff --git a/pypy/objspace/std/objectobject.py b/pypy/objspace/std/objectobject.py --- a/pypy/objspace/std/objectobject.py +++ b/pypy/objspace/std/objectobject.py @@ -280,6 +280,7 @@ return space.call_function(space.w_list, _objectdir(space, w_obj)) W_ObjectObject.typedef = TypeDef("object", + _text_signature_='()', __doc__ = "The most base type", __new__ = interp2app(descr__new__), __subclasshook__ = interp2app(descr___subclasshook__, as_classmethod=True), diff --git a/pypy/objspace/std/test/test_typeobject.py b/pypy/objspace/std/test/test_typeobject.py --- a/pypy/objspace/std/test/test_typeobject.py +++ b/pypy/objspace/std/test/test_typeobject.py @@ -543,6 +543,13 @@ type(X).__dict__["__doc__"].__delete__(X)) assert X.__doc__ == "banana" + def test_text_signature(self): + assert object.__text_signature__ == '()' + + class A: + pass + assert A.__text_signature__ is None + def test_metaclass_conflict(self): """ class T1(type): diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -185,6 +185,7 @@ self.hasuserdel = False self.weakrefable = False self.w_doc = space.w_None + self.text_signature = None self.weak_subclasses = [] self.flag_heaptype = is_heaptype self.flag_abstract = False @@ -975,6 +976,11 @@ raise oefmt(space.w_TypeError, "can't set %N.__doc__", w_type) w_type.setdictvalue(space, '__doc__', w_value) +def type_get_txtsig(space, w_type): + if w_type.text_signature is None: + return space.w_None + return space.newtext(w_type.text_signature) + def descr__dir(space, w_type): from pypy.objspace.std.util import _classdir return space.call_function(space.w_list, _classdir(space, w_type)) @@ -1062,6 +1068,7 @@ __mro__ = GetSetProperty(descr_get__mro__), __dict__=GetSetProperty(type_get_dict), __doc__ = GetSetProperty(descr__doc, descr_set__doc, cls=W_TypeObject, name='__doc__'), + __text_signature__=GetSetProperty(type_get_txtsig), __dir__ = gateway.interp2app(descr__dir), mro = gateway.interp2app(descr_mro), __flags__ = GetSetProperty(descr__flags), @@ -1271,6 +1278,7 @@ else: w_doc = w_self.space.newtext_or_none(instancetypedef.doc) w_self.w_doc = w_doc + w_self.text_signature = instancetypedef.text_signature ensure_common_attributes(w_self) # # usually 'instancetypedef' is new, i.e. not seen in any base, From pypy.commits at gmail.com Thu Nov 9 13:59:44 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 09 Nov 2017 10:59:44 -0800 (PST) Subject: [pypy-commit] pypy win32-vmprof: wip - shared files must be fixed upstream then pulled into here Message-ID: <5a04a5a0.09a0df0a.5b3a0.b704@mx.google.com> Author: Matti Picus Branch: win32-vmprof Changeset: r92985:e68720efe25c Date: 2017-11-09 19:57 +0200 http://bitbucket.org/pypy/pypy/changeset/e68720efe25c/ Log: wip - shared files must be fixed upstream then pulled into here diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h --- a/rpython/rlib/rvmprof/src/rvmprof.h +++ b/rpython/rlib/rvmprof/src/rvmprof.h @@ -6,10 +6,8 @@ #define SINGLE_BUF_SIZE (8192 - 2 * sizeof(unsigned int)) #ifdef VMPROF_WINDOWS -#include -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -typedef intptr_t ssize_t; +#include "shared/msiinttypes/inttypes.h" +#include "shared/msiinttypes/stdint.h" #else #include #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c @@ -1,4 +1,8 @@ #include "vmprof_win.h" +#ifdef RPYTHON_VMPROF +#include "common_header.h" +#include "structdef.h" /* for struct pypy_threadlocal_s */ +#endif volatile int thread_started = 0; volatile int enabled = 0; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h @@ -1,10 +1,9 @@ #pragma once +#include "compat.h" #include "windows.h" -#include "compat.h" #include "vmp_stack.h" #include - int prepare_concurrent_bufs(void); // This file has been inspired (but not copied from since the LICENSE diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -144,7 +144,9 @@ @pytest.fixture def init(self, tmpdir): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + compile_flags = [] + #compile_flags = ['-g', '-O0'] + eci = ExternalCompilationInfo(compile_extra=compile_flags, separate_module_sources=[""" RPY_EXTERN int native_func(int d) { int j = 0; From pypy.commits at gmail.com Thu Nov 9 13:59:46 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 09 Nov 2017 10:59:46 -0800 (PST) Subject: [pypy-commit] pypy win32-vmprof: call get_ident to register thread_ident in pypy_threadlocal_s Message-ID: <5a04a5a2.3bb0df0a.47892.d6ec@mx.google.com> Author: Matti Picus Branch: win32-vmprof Changeset: r92986:351273f6cab2 Date: 2017-11-09 19:58 +0200 http://bitbucket.org/pypy/pypy/changeset/351273f6cab2/ Log: call get_ident to register thread_ident in pypy_threadlocal_s diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -100,6 +100,8 @@ ENTRY_POINT_ARGS = (int, float) def entry_point(self, value, delta_t): + from rpython.rlib.rthread import get_ident + get_ident() # register thread_ident for win32 code = self.MyCode('py:code:52:test_enable') rvmprof.register_code(code, self.MyCode.get_name) fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) From pypy.commits at gmail.com Thu Nov 9 15:20:40 2017 From: pypy.commits at gmail.com (stian) Date: Thu, 09 Nov 2017 12:20:40 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Merge default Message-ID: <5a04b898.14a1df0a.ca2b9.3f7f@mx.google.com> Author: stian Branch: math-improvements Changeset: r92987:92d38b4c73a2 Date: 2017-11-09 19:16 +0100 http://bitbucket.org/pypy/pypy/changeset/92d38b4c73a2/ Log: Merge default diff too long, truncating to 2000 out of 6320 lines diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1027,21 +1027,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,13 @@ .. branch: docs-osx-brew-openssl +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -290,66 +290,87 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet # - def g(c): + def bar(c): c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -7,7 +7,7 @@ interpleveldefs = { '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', @@ -15,7 +15,9 @@ '_get_nullptr' : 'interp_cppyy.get_nullptr', 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -217,7 +217,8 @@ 'method_req_args' : ([c_scope, c_index], c_int), 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index], c_ccharp), + 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), 'method_is_template' : ([c_scope, c_index], c_int), 'method_num_template_args' : ([c_scope, c_index], c_int), @@ -498,9 +499,12 @@ def c_method_arg_default(space, cppscope, index, arg_index): args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_method_signature(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) +def c_method_prototype(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] + return charp2str_free(space, call_capi(space, 'method_prototype', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -4,7 +4,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance @@ -21,9 +21,9 @@ # match for the qualified type. -def get_rawobject(space, w_obj): +def get_rawobject(space, w_obj, can_be_None=True): from pypy.module._cppyy.interp_cppyy import W_CPPClass - cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None) if cppinstance: rawobject = cppinstance.get_rawobject() assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -48,17 +48,16 @@ return capi.C_NULL_OBJECT def is_nullpointer_specialcase(space, w_obj): - # 0, None, and nullptr may serve as "NULL", check for any of them + # 0 and nullptr may serve as "NULL" # integer 0 try: return space.int_w(w_obj) == 0 except Exception: pass - # None or nullptr + # C++-style nullptr from pypy.module._cppyy import interp_cppyy - return space.is_true(space.is_(w_obj, space.w_None)) or \ - space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) + return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) def get_rawbuffer(space, w_obj): # raw buffer @@ -74,7 +73,7 @@ return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass - # pre-defined NULL + # pre-defined nullptr if is_nullpointer_specialcase(space, w_obj): return rffi.cast(rffi.VOIDP, 0) raise TypeError("not an addressable buffer") @@ -392,6 +391,7 @@ _immutable_fields_ = ['typecode'] typecode = 'g' + class CStringConverter(TypeConverter): def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.LONGP, address) @@ -408,18 +408,27 @@ def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') +class CStringConverterWithSize(CStringConverter): + _immutable_fields_ = ['size'] + + def __init__(self, space, extra): + self.size = extra + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = self._get_raw_address(space, w_obj, offset) + charpptr = rffi.cast(rffi.CCHARP, address) + strsize = self.size + if charpptr[self.size-1] == '\0': + strsize = self.size-1 # rffi will add \0 back + return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + class VoidPtrConverter(TypeConverter): def _unwrap_object(self, space, w_obj): try: obj = get_rawbuffer(space, w_obj) except TypeError: - try: - # TODO: accept a 'capsule' rather than naked int - # (do accept int(0), though) - obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj)) - except Exception: - obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) + obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False)) return obj def cffi_type(self, space): @@ -463,12 +472,12 @@ def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) ba = rffi.cast(rffi.CCHARP, address) - r = rffi.cast(rffi.VOIDPP, call_local) try: - r[0] = get_rawbuffer(space, w_obj) + x[0] = get_rawbuffer(space, w_obj) except TypeError: + r = rffi.cast(rffi.VOIDPP, call_local) r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) - x[0] = rffi.cast(rffi.VOIDP, call_local) + x[0] = rffi.cast(rffi.VOIDP, call_local) ba[capi.c_function_arg_typeoffset(space)] = self.typecode def finalize_call(self, space, w_obj, call_local): @@ -495,9 +504,13 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPClass if isinstance(w_obj, W_CPPClass): - if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl): + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + # reject moves as all are explicit + raise ValueError("lvalue expected") + if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): rawobject = w_obj.get_rawobject() - offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1) + offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1) obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, @@ -518,6 +531,17 @@ x = rffi.cast(rffi.VOIDPP, address) x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) +class InstanceMoveConverter(InstanceRefConverter): + def _unwrap_object(self, space, w_obj): + # moving is same as by-ref, but have to check that move is allowed + from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE + if isinstance(w_obj, W_CPPClass): + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE + return InstanceRefConverter._unwrap_object(self, space, w_obj) + raise oefmt(space.w_ValueError, "object is not an rvalue") + + class InstanceConverter(InstanceRefConverter): def convert_argument_libffi(self, space, w_obj, address, call_local): @@ -527,7 +551,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): self._is_abstract(space) @@ -548,7 +572,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -582,8 +606,8 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, - do_cast=False, is_ref=True) + return interp_cppyy.wrap_cppinstance( + space, address, self.clsdecl, do_cast=False, is_ref=True) class StdStringConverter(InstanceConverter): @@ -606,7 +630,7 @@ assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy assign.call( - interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value]) + interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -672,7 +696,7 @@ _converters = {} # builtin and custom types _a_converters = {} # array and ptr versions of above -def get_converter(space, name, default): +def get_converter(space, _name, default): # The matching of the name to a converter should follow: # 1) full, exact match # 1a) const-removed match @@ -680,9 +704,9 @@ # 3) accept ref as pointer (for the stubs, const& can be # by value, but that does not work for the ffi path) # 4) generalized cases (covers basically all user classes) - # 5) void converter, which fails on use + # 5) void* or void converter (which fails on use) - name = capi.c_resolve_name(space, name) + name = capi.c_resolve_name(space, _name) # 1) full, exact match try: @@ -701,7 +725,7 @@ clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found - array_size = helper.array_size(name) + array_size = helper.array_size(_name) # uses original arg return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -719,6 +743,8 @@ return InstancePtrConverter(space, clsdecl) elif compound == "&": return InstanceRefConverter(space, clsdecl) + elif compound == "&&": + return InstanceMoveConverter(space, clsdecl) elif compound == "**": return InstancePtrPtrConverter(space, clsdecl) elif compound == "": @@ -726,11 +752,13 @@ elif capi.c_is_enum(space, clean_name): return _converters['unsigned'](space, default) - # 5) void converter, which fails on use - # + # 5) void* or void converter (which fails on use) + if 0 <= compound.find('*'): + return VoidPtrConverter(space, default) # "user knows best" + # return a void converter here, so that the class can be build even - # when some types are unknown; this overload will simply fail on use - return VoidConverter(space, name) + # when some types are unknown + return VoidConverter(space, name) # fails on use _converters["bool"] = BoolConverter @@ -847,6 +875,10 @@ for name in names: _a_converters[name+'[]'] = ArrayConverter _a_converters[name+'*'] = PtrConverter + + # special case, const char* w/ size and w/o '\0' + _a_converters["const char[]"] = CStringConverterWithSize + _build_array_converters() # add another set of aliased names diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -159,7 +159,7 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) return pyres def execute_libffi(self, space, cif_descr, funcaddr, buffer): @@ -167,7 +167,7 @@ result = rffi.ptradd(buffer, cif_descr.exchange_result) from pypy.module._cppyy import interp_cppyy ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) class InstancePtrPtrExecutor(InstancePtrExecutor): @@ -176,7 +176,7 @@ voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) ref_address = rffi.cast(rffi.VOIDPP, voidp_result) ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible @@ -188,8 +188,8 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, + do_cast=False, python_owns=True, fresh=True) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -19,14 +19,15 @@ RPY_EXTERN int cppyy_num_scopes(cppyy_scope_t parent); RPY_EXTERN - char* cppyy_scope_name(cppyy_scope_t parent, int iscope); - + char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope); RPY_EXTERN char* cppyy_resolve_name(const char* cppitem_name); RPY_EXTERN cppyy_scope_t cppyy_get_scope(const char* scope_name); RPY_EXTERN cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj); + RPY_EXTERN + size_t cppyy_size_of(cppyy_type_t klass); /* memory management ------------------------------------------------------ */ RPY_EXTERN @@ -120,6 +121,8 @@ RPY_EXTERN char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN + char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); @@ -130,7 +133,9 @@ RPY_EXTERN char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); @@ -147,8 +152,12 @@ /* method properties ------------------------------------------------------ */ RPY_EXTERN + int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); RPY_EXTERN + int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); /* data member reflection information ------------------------------------- */ diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -2,7 +2,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec -from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w +from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty from pypy.interpreter.baseobjspace import W_Root from rpython.rtyper.lltypesystem import rffi, lltype, llmemory @@ -15,6 +15,10 @@ from pypy.module._cppyy import converter, executor, ffitypes, helper +INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 +INSTANCE_FLAGS_IS_REF = 0x0002 +INSTANCE_FLAGS_IS_R_VALUE = 0x0004 + class FastCallNotPossible(Exception): pass @@ -33,16 +37,21 @@ class State(object): def __init__(self, space): + # final scoped name -> opaque handle self.cppscope_cache = { - "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) } + 'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') } + # opaque handle -> app-level python class + self.cppclass_registry = {} + # app-level class generator callback + self.w_clgen_callback = None + # app-level function generator callback (currently not used) + self.w_fngen_callback = None + # C++11's nullptr self.w_nullptr = None - self.cpptemplate_cache = {} - self.cppclass_registry = {} - self.w_clgen_callback = None - self.w_fngen_callback = None def get_nullptr(space): - if hasattr(space, "fake"): + # construct a unique address that compares to NULL, serves as nullptr + if hasattr(space, 'fake'): raise NotImplementedError state = space.fromcache(State) if state.w_nullptr is None: @@ -58,52 +67,48 @@ state.w_nullptr = nullarr return state.w_nullptr - at unwrap_spec(name='text') -def resolve_name(space, name): - return space.newtext(capi.c_resolve_name(space, name)) + at unwrap_spec(scoped_name='text') +def resolve_name(space, scoped_name): + return space.newtext(capi.c_resolve_name(space, scoped_name)) - at unwrap_spec(name='text') -def scope_byname(space, name): - true_name = capi.c_resolve_name(space, name) +# memoized lookup of handles by final, scoped, name of classes/namespaces + at unwrap_spec(final_scoped_name='text') +def scope_byname(space, final_scoped_name): state = space.fromcache(State) try: - return state.cppscope_cache[true_name] + return state.cppscope_cache[final_scoped_name] except KeyError: pass - opaque_handle = capi.c_get_scope_opaque(space, true_name) + opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name) assert lltype.typeOf(opaque_handle) == capi.C_SCOPE if opaque_handle: - final_name = capi.c_final_name(space, opaque_handle) - if capi.c_is_namespace(space, opaque_handle): - cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle) - elif capi.c_has_complex_hierarchy(space, opaque_handle): - cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle) + isns = capi.c_is_namespace(space, opaque_handle) + if isns: + cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name) else: - cppscope = W_CPPClassDecl(space, final_name, opaque_handle) - state.cppscope_cache[name] = cppscope + if capi.c_has_complex_hierarchy(space, opaque_handle): + cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name) + else: + cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name) - cppscope._build_methods() - cppscope._find_datamembers() + # store in the cache to prevent recursion + state.cppscope_cache[final_scoped_name] = cppscope + + if not isns: + # build methods/data; TODO: also defer this for classes (a functional __dir__ + # and instrospection for help() is enough and allows more lazy loading) + cppscope._build_methods() + cppscope._find_datamembers() + return cppscope return None - at unwrap_spec(name='text') -def template_byname(space, name): - state = space.fromcache(State) - try: - return state.cpptemplate_cache[name] - except KeyError: - pass - - if capi.c_is_template(space, name): - cpptemplate = W_CPPTemplateType(space, name) - state.cpptemplate_cache[name] = cpptemplate - return cpptemplate - - return None + at unwrap_spec(final_scoped_name='text') +def is_template(space, final_scoped_name): + return space.newbool(capi.c_is_template(space, final_scoped_name)) def std_string_name(space): return space.newtext(capi.std_string_name) @@ -189,8 +194,13 @@ # check number of given arguments against required (== total - defaults) args_expected = len(self.arg_defs) args_given = len(args_w) - if args_expected < args_given or args_given < self.args_required: - raise oefmt(self.space.w_TypeError, "wrong number of arguments") + + if args_given < self.args_required: + raise oefmt(self.space.w_TypeError, + "takes at least %d arguments (%d given)", self.args_required, args_given) + elif args_expected < args_given: + raise oefmt(self.space.w_TypeError, + "takes at most %d arguments (%d given)", args_expected, args_given) # initial setup of converters, executors, and libffi (if available) if self.converters is None: @@ -376,8 +386,11 @@ conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i) capi.c_deallocate_function_args(self.space, args) - def signature(self): - return capi.c_method_signature(self.space, self.scope, self.index) + def signature(self, show_formalargs=True): + return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + + def prototype(self, show_formalargs=True): + return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) def priority(self): total_arg_priority = 0 @@ -391,7 +404,7 @@ lltype.free(self.cif_descr, flavor='raw') def __repr__(self): - return "CPPMethod: %s" % self.signature() + return "CPPMethod: %s" % self.prototype() def _freeze_(self): assert 0, "you should never have a pre-built instance of this!" @@ -407,7 +420,7 @@ return capi.C_NULL_OBJECT def __repr__(self): - return "CPPFunction: %s" % self.signature() + return "CPPFunction: %s" % self.prototype() class CPPTemplatedCall(CPPMethod): @@ -440,7 +453,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPTemplatedCall: %s" % self.signature() + return "CPPTemplatedCall: %s" % self.prototype() class CPPConstructor(CPPMethod): @@ -462,7 +475,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPConstructor: %s" % self.signature() + return "CPPConstructor: %s" % self.prototype() class CPPSetItem(CPPMethod): @@ -549,12 +562,12 @@ w_exc_type = e.w_type elif all_same_type and not e.match(self.space, w_exc_type): all_same_type = False - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' '+e.errorstr(self.space) except Exception as e: # can not special case this for non-overloaded functions as we anyway need an # OperationError error down from here - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' Exception: '+str(e) if all_same_type and w_exc_type is not None: @@ -562,20 +575,20 @@ else: raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg)) - def signature(self): - sig = self.functions[0].signature() + def prototype(self): + sig = self.functions[0].prototype() for i in range(1, len(self.functions)): - sig += '\n'+self.functions[i].signature() + sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) def __repr__(self): - return "W_CPPOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', is_static = interp2app(W_CPPOverload.is_static), call = interp2app(W_CPPOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPOverload.prototype), ) @@ -591,24 +604,40 @@ @jit.unroll_safe @unwrap_spec(args_w='args_w') def call(self, w_cppinstance, args_w): + # TODO: factor out the following: + if capi.c_is_abstract(self.space, self.scope.handle): + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.scope.name) w_result = W_CPPOverload.call(self, w_cppinstance, args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) - return w_cppinstance - return wrap_cppobject(self.space, newthis, self.functions[0].scope, - do_cast=False, python_owns=True, fresh=True) def __repr__(self): - return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions] W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', is_static = interp2app(W_CPPConstructorOverload.is_static), call = interp2app(W_CPPConstructorOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPConstructorOverload.prototype), +) + + +class W_CPPTemplateOverload(W_CPPOverload): + @unwrap_spec(args_w='args_w') + def __getitem__(self, args_w): + pass + + def __repr__(self): + return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPTemplateOverload.typedef = TypeDef( + 'CPPTemplateOverload', + __getitem__ = interp2app(W_CPPTemplateOverload.call), ) @@ -622,6 +651,9 @@ def __call__(self, args_w): return self.method.bound_call(self.cppthis, args_w) + def __repr__(self): + return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions] + W_CPPBoundMethod.typedef = TypeDef( 'CPPBoundMethod', __call__ = interp2app(W_CPPBoundMethod.__call__), @@ -643,8 +675,8 @@ def _get_offset(self, cppinstance): if cppinstance: - assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle) - offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope) + assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle) + offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope) else: offset = self.offset return offset @@ -652,7 +684,7 @@ def get(self, w_cppinstance, w_pycppclass): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset) @@ -660,7 +692,7 @@ def set(self, w_cppinstance, w_value): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) self.converter.to_memory(self.space, w_cppinstance, w_value, offset) @@ -705,12 +737,12 @@ return space.w_False class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def __init__(self, space, name, opaque_handle): + def __init__(self, space, opaque_handle, final_scoped_name): self.space = space - self.name = name + self.name = final_scoped_name assert lltype.typeOf(opaque_handle) == capi.C_SCOPE self.handle = opaque_handle self.methods = {} @@ -753,7 +785,7 @@ overload = self.get_overload(name) sig = '(%s)' % signature for f in overload.functions: - if 0 < f.signature().find(sig): + if f.signature(False) == sig: return W_CPPOverload(self.space, self, [f]) raise oefmt(self.space.w_LookupError, "no overload matches signature") @@ -769,6 +801,9 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name'] + def _make_cppfunction(self, pyname, index): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) @@ -779,9 +814,6 @@ arg_defs.append((arg_type, arg_dflt)) return CPPFunction(self.space, self, index, arg_defs, args_required) - def _build_methods(self): - pass # force lazy lookups in namespaces - def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) offset = capi.c_datamember_offset(self.space, self, dm_idx) @@ -791,9 +823,6 @@ self.datamembers[dm_name] = datamember return datamember - def _find_datamembers(self): - pass # force lazy lookups in namespaces - def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) if not indices: @@ -855,18 +884,21 @@ class W_CPPClassDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] - _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]'] def _build_methods(self): assert len(self.methods) == 0 methods_temp = {} for i in range(capi.c_num_methods(self.space, self)): idx = capi.c_method_index_at(self.space, self, i) - pyname = helper.map_operator_name(self.space, - capi.c_method_name(self.space, self, idx), - capi.c_method_num_args(self.space, self, idx), - capi.c_method_result_type(self.space, self, idx)) + if capi.c_is_constructor(self.space, self, idx): + pyname = '__init__' + else: + pyname = helper.map_operator_name(self.space, + capi.c_method_name(self.space, self, idx), + capi.c_method_num_args(self.space, self, idx), + capi.c_method_result_type(self.space, self, idx)) cppmethod = self._make_cppfunction(pyname, idx) methods_temp.setdefault(pyname, []).append(cppmethod) # the following covers the case where the only kind of operator[](idx) @@ -883,7 +915,7 @@ # create the overload methods from the method sets for pyname, methods in methods_temp.iteritems(): CPPMethodSort(methods).sort() - if pyname == self.name: + if pyname == '__init__': overload = W_CPPConstructorOverload(self.space, self, methods[:]) else: overload = W_CPPOverload(self.space, self, methods[:]) @@ -934,11 +966,11 @@ raise self.missing_attribute_error(name) def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return 0 def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return cppinstance.get_rawobject() def is_namespace(self): @@ -973,13 +1005,13 @@ class W_CPPComplexClassDecl(W_CPPClassDecl): def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = capi.c_base_offset(self.space, self, calling_scope, cppinstance.get_rawobject(), 1) return offset def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = self.get_base_offset(cppinstance, calling_scope) return capi.direct_ptradd(cppinstance.get_rawobject(), offset) @@ -997,70 +1029,56 @@ W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False -class W_CPPTemplateType(W_Root): - _attrs_ = ['space', 'name'] - _immutable_fields = ['name'] - - def __init__(self, space, name): - self.space = space - self.name = name - - @unwrap_spec(args_w='args_w') - def __call__(self, args_w): - # TODO: this is broken but unused (see pythonify.py) - fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>']) - return scope_byname(self.space, fullname) - -W_CPPTemplateType.typedef = TypeDef( - 'CPPTemplateType', - __call__ = interp2app(W_CPPTemplateType.__call__), -) -W_CPPTemplateType.typedef.acceptable_as_base_class = False - - class W_CPPClass(W_Root): - _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns', + _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags', 'finalizer_registered'] - _immutable_fields_ = ["cppclass", "isref"] + _immutable_fields_ = ['clsdecl'] finalizer_registered = False - def __init__(self, space, cppclass, rawobject, isref, python_owns): + def __init__(self, space, decl, rawobject, isref, python_owns): self.space = space - self.cppclass = cppclass + self.clsdecl = decl assert lltype.typeOf(rawobject) == capi.C_OBJECT assert not isref or rawobject self._rawobject = rawobject assert not isref or not python_owns - self.isref = isref - self.python_owns = python_owns - self._opt_register_finalizer() + self.flags = 0 + if isref: + self.flags |= INSTANCE_FLAGS_IS_REF + if python_owns: + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() def _opt_register_finalizer(self): - if self.python_owns and not self.finalizer_registered \ - and not hasattr(self.space, "fake"): + if not self.finalizer_registered and not hasattr(self.space, "fake"): + assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS self.register_finalizer(self.space) self.finalizer_registered = True def _nullcheck(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): raise oefmt(self.space.w_ReferenceError, "trying to access a NULL pointer") # allow user to determine ownership rules on a per object level def fget_python_owns(self, space): - return space.newbool(self.python_owns) + return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS)) @unwrap_spec(value=bool) def fset_python_owns(self, space, value): - self.python_owns = space.is_true(value) - self._opt_register_finalizer() + if space.is_true(value): + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() + else: + self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS def get_cppthis(self, calling_scope): - return self.cppclass.get_cppthis(self, calling_scope) + return self.clsdecl.get_cppthis(self, calling_scope) def get_rawobject(self): - if not self.isref: + if not (self.flags & INSTANCE_FLAGS_IS_REF): return self._rawobject else: ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject) @@ -1078,12 +1096,9 @@ return None def instance__init__(self, args_w): - if capi.c_is_abstract(self.space, self.cppclass.handle): - raise oefmt(self.space.w_TypeError, - "cannot instantiate abstract class '%s'", - self.cppclass.name) - constructor_overload = self.cppclass.get_overload(self.cppclass.name) - constructor_overload.call(self, args_w) + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.clsdecl.name) def instance__eq__(self, w_other): # special case: if other is None, compare pointer-style @@ -1099,7 +1114,7 @@ for name in ["", "__gnu_cxx", "__1"]: nss = scope_byname(self.space, name) meth_idx = capi.c_get_global_operator( - self.space, nss, self.cppclass, other.cppclass, "operator==") + self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: f = nss._make_cppfunction("operator==", meth_idx) ol = W_CPPOverload(self.space, nss, [f]) @@ -1118,14 +1133,15 @@ # fallback 2: direct pointer comparison (the class comparison is needed since # the first data member in a struct and the struct have the same address) other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False) # TODO: factor out - iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass) + iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl) return self.space.newbool(iseq) def instance__ne__(self, w_other): return self.space.not_(self.instance__eq__(w_other)) def instance__nonzero__(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): return self.space.w_False return self.space.w_True @@ -1134,36 +1150,35 @@ if w_as_builtin is not None: return self.space.len(w_as_builtin) raise oefmt(self.space.w_TypeError, - "'%s' has no length", self.cppclass.name) + "'%s' has no length", self.clsdecl.name) def instance__cmp__(self, w_other): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.cmp(w_as_builtin, w_other) raise oefmt(self.space.w_AttributeError, - "'%s' has no attribute __cmp__", self.cppclass.name) + "'%s' has no attribute __cmp__", self.clsdecl.name) def instance__repr__(self): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.repr(w_as_builtin) return self.space.newtext("<%s object at 0x%x>" % - (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) + (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) def destruct(self): - if self._rawobject and not self.isref: + if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): memory_regulator.unregister(self) - capi.c_destruct(self.space, self.cppclass, self._rawobject) + capi.c_destruct(self.space, self.clsdecl, self._rawobject) self._rawobject = capi.C_NULL_OBJECT def _finalize_(self): - if self.python_owns: + if self.flags & INSTANCE_FLAGS_PYTHON_OWNS: self.destruct() W_CPPClass.typedef = TypeDef( 'CPPClass', - cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass), - _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), + __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), __init__ = interp2app(W_CPPClass.instance__init__), __eq__ = interp2app(W_CPPClass.instance__eq__), __ne__ = interp2app(W_CPPClass.instance__ne__), @@ -1220,21 +1235,21 @@ state = space.fromcache(State) return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar)) -def wrap_cppobject(space, rawobject, cppclass, - do_cast=True, python_owns=False, is_ref=False, fresh=False): +def wrap_cppinstance(space, rawobject, clsdecl, + do_cast=True, python_owns=False, is_ref=False, fresh=False): rawobject = rffi.cast(capi.C_OBJECT, rawobject) # cast to actual if requested and possible w_pycppclass = None if do_cast and rawobject: - actual = capi.c_actual_class(space, cppclass, rawobject) - if actual != cppclass.handle: + actual = capi.c_actual_class(space, clsdecl, rawobject) + if actual != clsdecl.handle: try: w_pycppclass = get_pythonized_cppclass(space, actual) - offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1) + offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1) rawobject = capi.direct_ptradd(rawobject, offset) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) + w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False) except Exception: # failed to locate/build the derived class, so stick to the base (note # that only get_pythonized_cppclass is expected to raise, so none of @@ -1242,18 +1257,18 @@ pass if w_pycppclass is None: - w_pycppclass = get_pythonized_cppclass(space, cppclass.handle) + w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle) # try to recycle existing object if this one is not newly created if not fresh and rawobject: obj = memory_regulator.retrieve(rawobject) - if obj is not None and obj.cppclass is cppclass: + if obj is not None and obj.clsdecl is clsdecl: return obj # fresh creation w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass) cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False) - cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns) + cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns) memory_regulator.register(cppinstance) return w_cppinstance @@ -1264,7 +1279,7 @@ except TypeError: pass # attempt to get address of C++ instance - return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj)) + return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False)) @unwrap_spec(w_obj=W_Root) def addressof(space, w_obj): @@ -1273,19 +1288,30 @@ return space.newlong(address) @unwrap_spec(owns=bool, cast=bool) -def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): - """Takes an address and a bound C++ class proxy, returns a bound instance.""" +def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False): try: # attempt address from array or C++ instance rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj)) except Exception: # accept integer value as address rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj)) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - if not w_cppclass: - w_cppclass = scope_byname(space, space.text_w(w_pycppclass)) - if not w_cppclass: + decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False) + return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast) + + at unwrap_spec(owns=bool, cast=bool) +def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): + """Takes an address and a bound C++ class proxy, returns a bound instance.""" + w_clsdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + if not w_clsdecl: + w_clsdecl = scope_byname(space, space.text_w(w_pycppclass)) + if not w_clsdecl: raise oefmt(space.w_TypeError, "no such class: %s", space.text_w(w_pycppclass)) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) - return wrap_cppobject(space, rawobject, cppclass, do_cast=cast, python_owns=owns) + return _bind_object(space, w_obj, w_clsdecl, owns, cast) + +def move(space, w_obj): + """Casts the given instance into an C++-style rvalue.""" + obj = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + if obj: + obj.flags |= INSTANCE_FLAGS_IS_R_VALUE + return w_obj diff --git a/pypy/module/_cppyy/pythonify.py b/pypy/module/_cppyy/pythonify.py --- a/pypy/module/_cppyy/pythonify.py +++ b/pypy/module/_cppyy/pythonify.py @@ -10,7 +10,7 @@ class CPPMetaScope(type): def __getattr__(self, name): try: - return get_pycppitem(self, name) # will cache on self + return get_scoped_pycppitem(self, name) # will cache on self except Exception as e: raise AttributeError("%s object has no attribute '%s' (details: %s)" % (self, name, str(e))) @@ -36,11 +36,14 @@ self._scope = scope def _arg_to_str(self, arg): - if arg == str: - import _cppyy - arg = _cppyy._std_string_name() - elif type(arg) != str: - arg = arg.__name__ + try: + arg = arg.__cppname__ + except AttributeError: + if arg == str: + import _cppyy + arg = _cppyy._std_string_name() + elif type(arg) != str: + arg = arg.__name__ return arg def __call__(self, *args): @@ -58,8 +61,36 @@ return self.__call__(*args) -def clgen_callback(name): - return get_pycppclass(name) +def scope_splitter(name): + is_open_template, scope = 0, "" + for c in name: + if c == ':' and not is_open_template: + if scope: + yield scope + scope = "" + continue + elif c == '<': + is_open_template += 1 + elif c == '>': + is_open_template -= 1 + scope += c + yield scope + +def get_pycppitem(final_scoped_name): + # walk scopes recursively down from global namespace ("::") to get the + # actual (i.e. not typedef'ed) class, triggering all necessary creation + scope = gbl + for name in scope_splitter(final_scoped_name): + scope = getattr(scope, name) + return scope +get_pycppclass = get_pycppitem # currently no distinction, but might + # in future for performance + + +# callbacks (originating from interp_cppyy.py) to allow interp-level to +# initiate creation of app-level classes and function +def clgen_callback(final_scoped_name): + return get_pycppclass(final_scoped_name) def fngen_callback(func, npar): # todo, some kind of arg transform spec if npar == 0: @@ -75,20 +106,19 @@ return wrapper +# construction of namespaces and classes, and their helpers +def make_module_name(scope): + if scope: + return scope.__module__ + '.' + scope.__name__ + return 'cppyy' + def make_static_function(func_name, cppol): def function(*args): return cppol.call(None, *args) function.__name__ = func_name - function.__doc__ = cppol.signature() + function.__doc__ = cppol.prototype() return staticmethod(function) -def make_method(meth_name, cppol): - def method(self, *args): - return cppol.call(self, *args) - method.__name__ = meth_name - method.__doc__ = cppol.signature() - return method - def make_cppnamespace(scope, name, decl): # build up a representation of a C++ namespace (namespaces are classes) @@ -98,20 +128,19 @@ ns_meta = type(name+'_meta', (CPPMetaNamespace,), {}) # create the python-side C++ namespace representation, cache in scope if given - d = {"__cppdecl__" : decl, "__cppname__" : decl.__cppname__ } + d = {"__cppdecl__" : decl, + "__module__" : make_module_name(scope), + "__cppname__" : decl.__cppname__ } pyns = ns_meta(name, (CPPNamespace,), d) if scope: setattr(scope, name, pyns) # install as modules to allow importing from (note naming: cppyy) - modname = 'cppyy.gbl' - if scope: - modname = 'cppyy.gbl.'+pyns.__cppname__.replace('::', '.') - sys.modules[modname] = pyns + sys.modules[make_module_name(pyns)] = pyns return pyns def _drop_cycles(bases): - # TODO: figure this out, as it seems to be a PyPy bug?! + # TODO: figure out why this is necessary? for b1 in bases: for b2 in bases: if not (b1 is b2) and issubclass(b2, b1): @@ -119,27 +148,37 @@ break return tuple(bases) -def make_new(class_name): + +def make_new(decl): def __new__(cls, *args): # create a place-holder only as there may be a derived class defined + # TODO: get rid of the import and add user-land bind_object that uses + # _bind_object (see interp_cppyy.py) import _cppyy - instance = _cppyy.bind_object(0, class_name, True) + instance = _cppyy._bind_object(0, decl, True) if not instance.__class__ is cls: instance.__class__ = cls # happens for derived class return instance return __new__ -def make_cppclass(scope, class_name, final_class_name, decl): +def make_method(meth_name, cppol): + def method(self, *args): + return cppol.call(self, *args) + method.__name__ = meth_name + method.__doc__ = cppol.prototype() + return method + +def make_cppclass(scope, cl_name, decl): # get a list of base classes for class creation bases = [get_pycppclass(base) for base in decl.get_base_names()] if not bases: bases = [CPPClass,] else: - # it's technically possible that the required class now has been built - # if one of the base classes uses it in e.g. a function interface + # it's possible that the required class now has been built if one of + # the base classes uses it in e.g. a function interface try: - return scope.__dict__[final_class_name] + return scope.__dict__[cl_name] except KeyError: pass @@ -147,39 +186,41 @@ d_meta = {} # prepare dictionary for python-side C++ class representation - def dispatch(self, name, signature): - cppol = decl.dispatch(name, signature) - return types.MethodType(make_method(name, cppol), self, type(self)) + def dispatch(self, m_name, signature): + cppol = decl.__dispatch__(m_name, signature) + return types.MethodType(make_method(m_name, cppol), self, type(self)) d_class = {"__cppdecl__" : decl, + "__new__" : make_new(decl), + "__module__" : make_module_name(scope), "__cppname__" : decl.__cppname__, From pypy.commits at gmail.com Thu Nov 9 15:20:42 2017 From: pypy.commits at gmail.com (stian) Date: Thu, 09 Nov 2017 12:20:42 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Fix translation Message-ID: <5a04b89a.099fdf0a.c3df7.3259@mx.google.com> Author: stian Branch: math-improvements Changeset: r92988:c961b6f6e3c6 Date: 2017-11-09 21:20 +0100 http://bitbucket.org/pypy/pypy/changeset/c961b6f6e3c6/ Log: Fix translation diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -167,6 +167,7 @@ def __ne__(self, other): return not (self == other) + @specialize.argtype(1) def digit(self, x): """Return the x'th digit, as an int.""" return self._digits[x] @@ -212,13 +213,15 @@ if intval < 0: sign = -1 ival = -r_uint(intval) + carry = ival >> SHIFT elif intval > 0: sign = 1 ival = r_uint(intval) + carry = 0 else: return NULLRBIGINT - carry = ival >> SHIFT + if carry: return rbigint([_store_digit(ival & MASK), _store_digit(carry)], sign, 2) @@ -851,17 +854,17 @@ size = UDIGIT_TYPE(self.numdigits() - 1) if size > 0: - rem = self.widedigit(size) + wrem = self.widedigit(size) while size > 0: size -= 1 - rem = ((rem << SHIFT) | self.digit(size)) % digit - + wrem = ((wrem << SHIFT) | self.digit(size)) % digit + rem = _store_digit(wrem) else: - rem = self.widedigit(0) % digit + rem = _store_digit(self.digit(0) % digit) if rem == 0: return NULLRBIGINT - mod = rbigint([_store_digit(rem)], -1 if self.sign < 0 else 1, 1) + mod = rbigint([rem], -1 if self.sign < 0 else 1, 1) else: div, mod = _divrem(self, other) if mod.sign * other.sign == -1: @@ -893,16 +896,17 @@ size = UDIGIT_TYPE(self.numdigits() - 1) if size > 0: - rem = self.widedigit(size) + wrem = self.widedigit(size) while size > 0: size -= 1 - rem = ((rem << SHIFT) | self.digit(size)) % digit + wrem = ((wrem << SHIFT) | self.digit(size)) % digit + rem = _store_digit(wrem) else: - rem = self.digit(0) % digit + rem = _store_digit(self.digit(0) % digit) if rem == 0: return NULLRBIGINT - mod = rbigint([_store_digit(rem)], -1 if self.sign < 0 else 1, 1) + mod = rbigint([rem], -1 if self.sign < 0 else 1, 1) else: raise ZeroDivisionError("long division or modulo by zero") From pypy.commits at gmail.com Thu Nov 9 17:22:17 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 09 Nov 2017 14:22:17 -0800 (PST) Subject: [pypy-commit] pypy default: remove more maemo code Message-ID: <5a04d519.cc8ddf0a.830c6.3c72@mx.google.com> Author: Matti Picus Branch: Changeset: r92989:7b112966cdd7 Date: 2017-11-10 00:21 +0200 http://bitbucket.org/pypy/pypy/changeset/7b112966cdd7/ Log: remove more maemo code diff --git a/rpython/translator/platform/test/test_posix.py b/rpython/translator/platform/test/test_posix.py --- a/rpython/translator/platform/test/test_posix.py +++ b/rpython/translator/platform/test/test_posix.py @@ -64,10 +64,3 @@ assert 'INCLUDEDIRS = %s/foo/baz/include' % include_prefix in Makefile assert 'LIBDIRS = %s/foo/baz/lib' % lib_prefix in Makefile -class TestMaemo(TestMakefile): - strict_on_stderr = False - - def setup_class(cls): - from rpython.translator.platform.maemo import check_scratchbox, Maemo - check_scratchbox() - cls.platform = Maemo() From pypy.commits at gmail.com Sat Nov 11 10:51:43 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 11 Nov 2017 07:51:43 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix translation Message-ID: <5a071c8f.57b9df0a.291a8.22f6@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92990:92c6fb568fa1 Date: 2017-11-11 15:51 +0000 http://bitbucket.org/pypy/pypy/changeset/92c6fb568fa1/ Log: fix translation diff --git a/pypy/objspace/std/typeobject.py b/pypy/objspace/std/typeobject.py --- a/pypy/objspace/std/typeobject.py +++ b/pypy/objspace/std/typeobject.py @@ -977,6 +977,7 @@ w_type.setdictvalue(space, '__doc__', w_value) def type_get_txtsig(space, w_type): + w_type = _check(space, w_type) if w_type.text_signature is None: return space.w_None return space.newtext(w_type.text_signature) From pypy.commits at gmail.com Sat Nov 11 16:06:26 2017 From: pypy.commits at gmail.com (fijal) Date: Sat, 11 Nov 2017 13:06:26 -0800 (PST) Subject: [pypy-commit] pypy default: add a hint Message-ID: <5a076652.44841c0a.152ca.d548@mx.google.com> Author: fijal Branch: Changeset: r92991:e5bfccc9fd98 Date: 2017-11-11 16:05 -0500 http://bitbucket.org/pypy/pypy/changeset/e5bfccc9fd98/ Log: add a hint diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -385,6 +385,7 @@ @specialize.argtype(1) def _inplace_add(self, other): + resizelist_hint(self._data, len(self._data) + len(other)) for i in range(len(other)): self._data.append(other[i]) From pypy.commits at gmail.com Sat Nov 11 23:29:38 2017 From: pypy.commits at gmail.com (stian) Date: Sat, 11 Nov 2017 20:29:38 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Remove some unneddecary use of widedigit in _x_mul Message-ID: <5a07ce32.c39cdf0a.b72ee.96a2@mx.google.com> Author: stian Branch: math-improvements Changeset: r92992:985fb3488ff0 Date: 2017-11-12 05:28 +0100 http://bitbucket.org/pypy/pypy/changeset/985fb3488ff0/ Log: Remove some unneddecary use of widedigit in _x_mul diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -1753,12 +1753,12 @@ pz += 1 carry >>= SHIFT if carry: - carry += z.uwidedigit(pz) + carry += z.udigit(pz) z.setdigit(pz, carry) pz += 1 carry >>= SHIFT if carry: - z.setdigit(pz, z.uwidedigit(pz) + carry) + z.setdigit(pz, z.udigit(pz) + carry) assert (carry >> SHIFT) == 0 i += 1 z._normalize() @@ -1822,7 +1822,7 @@ pz += 1 carry >>= SHIFT if carry: - z.setdigit(pz, z.uwidedigit(pz) + carry) + z.setdigit(pz, z.udigit(pz) + carry) z._normalize() return z From pypy.commits at gmail.com Sun Nov 12 02:41:43 2017 From: pypy.commits at gmail.com (stian) Date: Sat, 11 Nov 2017 23:41:43 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Make inplace_divmod unsigned, this makes for a ~20% speed up in long / single digit Message-ID: <5a07fb37.131f1c0a.a8ee2.41fb@mx.google.com> Author: stian Branch: math-improvements Changeset: r92993:3c7a6c85f39c Date: 2017-11-12 08:40 +0100 http://bitbucket.org/pypy/pypy/changeset/3c7a6c85f39c/ Log: Make inplace_divmod unsigned, this makes for a ~20% speed up in long / single digit diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -718,7 +718,7 @@ elif a._digits[0] == ONEDIGIT: return rbigint(b._digits[:bsize], a.sign * b.sign, bsize) elif bsize == 1: - res = b.uwidedigit(0) * a.uwidedigit(0) + res = b.uwidedigit(0) * a.udigit(0) carry = res >> SHIFT if carry: return rbigint([_store_digit(res & MASK), _store_digit(carry)], a.sign * b.sign, 2) @@ -1949,13 +1949,13 @@ Divide bigint pin by non-zero digit n, storing quotient in pout, and returning the remainder. It's OK for pin == pout on entry. """ - rem = _widen_digit(0) + rem = _unsigned_widen_digit(0) assert n > 0 and n <= MASK if not size: size = pin.numdigits() size -= 1 while size >= 0: - rem = (rem << SHIFT) | pin.digit(size) + rem = (rem << SHIFT) | pin.udigit(size) hi = rem // n pout.setdigit(size, hi) rem -= hi * n From pypy.commits at gmail.com Sun Nov 12 04:36:38 2017 From: pypy.commits at gmail.com (stian) Date: Sun, 12 Nov 2017 01:36:38 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Provide two assets to make better code in long multidigit division Message-ID: <5a081626.26afdf0a.bdbf5.9cbe@mx.google.com> Author: stian Branch: math-improvements Changeset: r92994:22373c826010 Date: 2017-11-12 10:29 +0100 http://bitbucket.org/pypy/pypy/changeset/22373c826010/ Log: Provide two assets to make better code in long multidigit division diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2130,8 +2130,11 @@ vtop = 0 else: vtop = v.widedigit(j) << SHIFT - #assert vtop <= wm1 + vv = vtop | v.digit(abs(j-1)) + # These two hints to make division just as fast as doing it unsigned. + assert vv >= 0 + assert wm1 >= 1 q = vv / wm1 r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q. vj2 = v.digit(abs(j-2)) From pypy.commits at gmail.com Sun Nov 12 04:36:40 2017 From: pypy.commits at gmail.com (stian) Date: Sun, 12 Nov 2017 01:36:40 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Tweak comment about why we don't do it unsigned. Message-ID: <5a081628.480f1c0a.a8b02.89ac@mx.google.com> Author: stian Branch: math-improvements Changeset: r92995:f09288ca6bf9 Date: 2017-11-12 10:36 +0100 http://bitbucket.org/pypy/pypy/changeset/f09288ca6bf9/ Log: Tweak comment about why we don't do it unsigned. diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2117,7 +2117,7 @@ wm1 = w.widedigit(abs(size_w-1)) wm2 = w.widedigit(abs(size_w-2)) - + j = size_v - 1 k -= 1 while k >= 0: @@ -2132,7 +2132,7 @@ vtop = v.widedigit(j) << SHIFT vv = vtop | v.digit(abs(j-1)) - # These two hints to make division just as fast as doing it unsigned. + # Hints to make division just as fast as doing it unsigned. But avoids casting to get correct results. assert vv >= 0 assert wm1 >= 1 q = vv / wm1 From pypy.commits at gmail.com Sun Nov 12 09:31:49 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 12 Nov 2017 06:31:49 -0800 (PST) Subject: [pypy-commit] pypy default: Issue #2699: test and fixes. Message-ID: <5a085b55.178fdf0a.93bfd.bea7@mx.google.com> Author: Armin Rigo Branch: Changeset: r92996:bc4acc4caa28 Date: 2017-11-12 15:30 +0100 http://bitbucket.org/pypy/pypy/changeset/bc4acc4caa28/ Log: Issue #2699: test and fixes. Note that this includes a fix to the stdlib warnings.py, otherwise non-ascii warning messages are usually swallowed. That's a bug in CPython, I think. diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -201,9 +201,20 @@ w_stderr = space.sys.get("stderr") # Print "filename:lineno: category: text\n" - message = "%s:%d: %s: %s\n" % (space.text_w(w_filename), lineno, - space.text_w(w_name), space.text_w(w_text)) - space.call_method(w_stderr, "write", space.newtext(message)) + try: + message = "%s:%d: %s: %s\n" % (space.text_w(w_filename), lineno, + space.text_w(w_name), + space.text_w(w_text)) + except OperationError as e: + if e.async(space): + raise + message = u"%s:%d: %s: %s\n" % (space.unicode_w(w_filename), lineno, + space.unicode_w(w_name), + space.unicode_w(w_text)) + w_message = space.newunicode(message) + else: + w_message = space.newtext(message) + space.call_method(w_stderr, "write", w_message) # Print " source_line\n" if not w_sourceline: @@ -248,7 +259,7 @@ if space.isinstance_w(w_message, space.w_Warning): w_text = space.str(w_message) w_category = space.type(w_message) - elif (not space.isinstance_w(w_message, space.w_unicode) or + elif (not space.isinstance_w(w_message, space.w_unicode) and not space.isinstance_w(w_message, space.w_bytes)): w_text = space.str(w_message) w_message = space.call_function(w_category, w_message) diff --git a/pypy/module/_warnings/test/test_warnings.py b/pypy/module/_warnings/test/test_warnings.py --- a/pypy/module/_warnings/test/test_warnings.py +++ b/pypy/module/_warnings/test/test_warnings.py @@ -65,3 +65,23 @@ _warnings.warn('test', UserWarning) globals()['__file__'] = None _warnings.warn('test', UserWarning) + + def test_warn_unicode(self): + import _warnings, sys + old = sys.stderr + try: + class Grab: + def write(self, u): + self.data.append(u) + sys.stderr = Grab() + sys.stderr.data = data = [] + _warnings.warn_explicit("9238exbexn8", Warning, + "", 1421, module_globals=globals()) + assert isinstance(''.join(data), str) + _warnings.warn_explicit(u"\u1234\u5678", UserWarning, + "", 831, module_globals=globals()) + assert isinstance(''.join(data), unicode) + assert ''.join(data).endswith( + u':831: UserWarning: \u1234\u5678\n') + finally: + sys.stderr = old From pypy.commits at gmail.com Sun Nov 12 15:14:07 2017 From: pypy.commits at gmail.com (stian) Date: Sun, 12 Nov 2017 12:14:07 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Fix ulllong division OP in rtyper Message-ID: <5a08ab8f.1cbf1c0a.e0517.e336@mx.google.com> Author: stian Branch: math-improvements Changeset: r92997:8b41193b43b2 Date: 2017-11-12 21:10 +0100 http://bitbucket.org/pypy/pypy/changeset/8b41193b43b2/ Log: Fix ulllong division OP in rtyper diff --git a/rpython/rtyper/rint.py b/rpython/rtyper/rint.py --- a/rpython/rtyper/rint.py +++ b/rpython/rtyper/rint.py @@ -476,7 +476,7 @@ @jit.dont_look_inside def ll_ulllong_py_div(x, y): - return llop.ullong_floordiv(UnsignedLongLongLong, x, y) + return llop.ulllong_floordiv(UnsignedLongLongLong, x, y) def ll_ulllong_py_div_zer(x, y): if y == 0: From pypy.commits at gmail.com Sun Nov 12 17:16:24 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 12 Nov 2017 14:16:24 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on PyCFunctions Message-ID: <5a08c838.21b9df0a.93d86.5c2e@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92998:a626dd21b1fa Date: 2017-11-12 20:11 +0000 http://bitbucket.org/pypy/pypy/changeset/a626dd21b1fa/ Log: Implement __text_signature__ on PyCFunctions diff --git a/pypy/module/cpyext/methodobject.py b/pypy/module/cpyext/methodobject.py --- a/pypy/module/cpyext/methodobject.py +++ b/pypy/module/cpyext/methodobject.py @@ -43,6 +43,39 @@ from pypy.module.cpyext.object import _dealloc _dealloc(space, py_obj) +def undotted_name(name): + """Return the last component of a dotted name""" + dotpos = name.rfind('.') + if dotpos < 0: + return name + else: + return name[dotpos + 1:] + +SIGNATURE_MARKER = ')\n--\n\n' + +def extract_doc(raw_doc, name): + doc = raw_doc + name = undotted_name(name) + if raw_doc.startswith(name + '('): + end_sig = raw_doc.find(SIGNATURE_MARKER) + if end_sig > 0: + doc = raw_doc[end_sig + len(SIGNATURE_MARKER):] + if not doc: + return None + return doc + +def extract_txtsig(raw_doc, name): + name = undotted_name(name) + if raw_doc.startswith(name + '('): + end_sig = raw_doc.find(SIGNATURE_MARKER) + if end_sig > 0: + # Notes: + # * Parentheses are included + # * SIGNATURE_MARKER cannot appear inside name, + # so end_sig > len(name) + return raw_doc[len(name): end_sig + 1] + return None + class W_PyCFunctionObject(W_Root): # TODO create a slightly different class depending on the c_ml_flags def __init__(self, space, ml, w_self, w_module=None): @@ -84,11 +117,22 @@ raise oefmt(space.w_RuntimeError, "unknown calling convention") def get_doc(self, space): - doc = self.ml.c_ml_doc - if doc: - return space.newtext(rffi.charp2str(rffi.cast(rffi.CCHARP,doc))) - else: - return space.w_None + c_doc = self.ml.c_ml_doc + if c_doc: + rawdoc = rffi.charp2str(rffi.cast(rffi.CCHARP, c_doc)) + doc = extract_doc(rawdoc, self.name) + if doc is not None: + return space.newtext(doc) + return space.w_None + + def get_txtsig(self, space): + c_doc = self.ml.c_ml_doc + if c_doc: + rawdoc = rffi.charp2str(rffi.cast(rffi.CCHARP, c_doc)) + txtsig = extract_txtsig(rawdoc, self.name) + if txtsig is not None: + return space.newtext(txtsig) + return space.w_None class W_PyCFunctionObjectNoArgs(W_PyCFunctionObject): def call(self, space, w_self, w_args, w_kw): @@ -289,6 +333,7 @@ 'builtin_function_or_method', __call__ = interp2app(cfunction_descr_call), __doc__ = GetSetProperty(W_PyCFunctionObject.get_doc), + __text_signature__ = GetSetProperty(W_PyCFunctionObject.get_txtsig), __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObject), __name__ = interp_attrproperty('name', cls=W_PyCFunctionObject, wrapfn="newtext_or_none"), @@ -299,6 +344,7 @@ 'builtin_function_or_method', W_PyCFunctionObject.typedef, __call__ = interp2app(cfunction_descr_call_noargs), __doc__ = GetSetProperty(W_PyCFunctionObjectNoArgs.get_doc), + __text_signature__ = GetSetProperty(W_PyCFunctionObjectNoArgs.get_txtsig), __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObjectNoArgs), __name__ = interp_attrproperty('name', cls=W_PyCFunctionObjectNoArgs, wrapfn="newtext_or_none"), @@ -309,6 +355,7 @@ 'builtin_function_or_method', W_PyCFunctionObject.typedef, __call__ = interp2app(cfunction_descr_call_single_object), __doc__ = GetSetProperty(W_PyCFunctionObjectSingleObject.get_doc), + __text_signature__ = GetSetProperty(W_PyCFunctionObjectSingleObject.get_txtsig), __module__ = interp_attrproperty_w('w_module', cls=W_PyCFunctionObjectSingleObject), __name__ = interp_attrproperty('name', cls=W_PyCFunctionObjectSingleObject, wrapfn="newtext_or_none"), diff --git a/pypy/module/cpyext/test/docstrings.c b/pypy/module/cpyext/test/docstrings.c new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/docstrings.c @@ -0,0 +1,149 @@ +#include "Python.h" + +static PyObject * +test_with_docstring(PyObject *self) +{ + Py_RETURN_NONE; +} + +PyDoc_STRVAR(empty_doc, +"" +); + +PyDoc_STRVAR(no_sig, +"This docstring has no signature." +); + +PyDoc_STRVAR(invalid_sig, +"invalid_sig($module, /, boo)\n" +"\n" +"This docstring has an invalid signature." +); + +PyDoc_STRVAR(invalid_sig2, +"invalid_sig2($module, /, boo)\n" +"\n" +"--\n" +"\n" +"This docstring also has an invalid signature." +); + +PyDoc_STRVAR(with_sig, +"with_sig($module, /, sig)\n" +"--\n" +"\n" +"This docstring has a valid signature." +); + +PyDoc_STRVAR(with_sig_but_no_doc, +"with_sig_but_no_doc($module, /, sig)\n" +"--\n" +"\n" +); + +PyDoc_STRVAR(with_signature_and_extra_newlines, +"with_signature_and_extra_newlines($module, /, parameter)\n" +"--\n" +"\n" +"\n" +"This docstring has a valid signature and some extra newlines." +); + + +static PyMethodDef methods[] = { + {"no_doc", + (PyCFunction)test_with_docstring, METH_NOARGS}, + {"empty_doc", + (PyCFunction)test_with_docstring, METH_NOARGS, + empty_doc}, + {"no_sig", + (PyCFunction)test_with_docstring, METH_NOARGS, + no_sig}, + {"invalid_sig", + (PyCFunction)test_with_docstring, METH_NOARGS, + invalid_sig}, + {"invalid_sig2", + (PyCFunction)test_with_docstring, METH_NOARGS, + invalid_sig2}, + {"with_sig", + (PyCFunction)test_with_docstring, METH_NOARGS, + with_sig}, + {"with_sig_but_no_doc", + (PyCFunction)test_with_docstring, METH_NOARGS, + with_sig_but_no_doc}, + {"with_signature_and_extra_newlines", + (PyCFunction)test_with_docstring, METH_NOARGS, + with_signature_and_extra_newlines}, + {NULL, NULL} /* sentinel */ +}; + + +static PyType_Slot HeapType_slots[] = { + {Py_tp_doc, "HeapType()\n--\n\nA type with a signature"}, + {0, 0}, +}; + +static PyType_Spec HeapType_spec = { + "docstrings.HeapType", + sizeof(PyObject), + 0, + Py_TPFLAGS_DEFAULT, + HeapType_slots +}; + +static PyTypeObject SomeType = { + PyVarObject_HEAD_INIT(NULL, 0) + "docstrings.SomeType", /* tp_name */ + sizeof(PyObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + "SomeType()\n--\n\nA type with a signature", /* tp_doc */ +}; + + +static struct PyModuleDef def = { + PyModuleDef_HEAD_INIT, + "docstrings", + NULL, + -1, + methods, + NULL, + NULL, + NULL, + NULL +}; + + +PyMODINIT_FUNC +PyInit_docstrings(void) +{ + PyObject *m, *tmp; + m = PyModule_Create(&def); + if (m == NULL) + return NULL; + tmp = PyType_FromSpec(&HeapType_spec); + if (tmp == NULL) + return NULL; + if (PyModule_AddObject(m, "HeapType", tmp) != 0) + return NULL; + if (PyType_Ready(&SomeType) < 0) + return NULL; + if (PyModule_AddObject(m, "SomeType", (PyObject*)&SomeType) != 0) + return NULL; + return m; +} diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py --- a/pypy/module/cpyext/test/test_methodobject.py +++ b/pypy/module/cpyext/test/test_methodobject.py @@ -100,3 +100,23 @@ assert mod.check(A) == 0 assert mod.check(A.meth) == 0 assert mod.check(A.stat) == 0 + + def test_text_signature(self): + mod = self.import_module('docstrings') + assert mod.no_doc.__doc__ is None + assert mod.no_doc.__text_signature__ is None + assert mod.empty_doc.__doc__ is None + assert mod.empty_doc.__text_signature__ is None + assert mod.no_sig.__doc__ + assert mod.no_sig.__text_signature__ is None + assert mod.invalid_sig.__doc__ + assert mod.invalid_sig.__text_signature__ is None + assert mod.invalid_sig2.__doc__ + assert mod.invalid_sig2.__text_signature__ is None + assert mod.with_sig.__doc__ + assert mod.with_sig.__text_signature__ == '($module, /, sig)' + assert mod.with_sig_but_no_doc.__doc__ is None + assert mod.with_sig_but_no_doc.__text_signature__ == '($module, /, sig)' + assert mod.with_signature_and_extra_newlines.__doc__ + assert (mod.with_signature_and_extra_newlines.__text_signature__ == + '($module, /, parameter)') From pypy.commits at gmail.com Sun Nov 12 17:16:26 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 12 Nov 2017 14:16:26 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Implement __text_signature__ on C-defined types Message-ID: <5a08c83a.51a9df0a.44f05.09e5@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r92999:a6bc26a09fc3 Date: 2017-11-12 22:15 +0000 http://bitbucket.org/pypy/pypy/changeset/a6bc26a09fc3/ Log: Implement __text_signature__ on C-defined types diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -453,6 +453,16 @@ p = property(lambda: "never used", pset, pdel) assert module.tp_descr_set(p) is True + def test_text_signature(self): + module = self.import_module(name='docstrings') + assert module.SomeType.__text_signature__ == '()' + assert module.SomeType.__doc__ == 'A type with a signature' + if '__pypy__' in sys.modules: + assert module.HeapType.__text_signature__ == '()' + else: # XXX: bug in CPython? + assert module.HeapType.__text_signature__ is None + assert module.HeapType.__doc__ == 'A type with a signature' + class TestTypes(BaseApiTest): def test_type_attributes(self, space, api): diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py --- a/pypy/module/cpyext/typeobject.py +++ b/pypy/module/cpyext/typeobject.py @@ -23,7 +23,7 @@ from pypy.module.cpyext.cparser import CTypeSpace from pypy.module.cpyext.methodobject import (W_PyCClassMethodObject, W_PyCWrapperObject, PyCFunction_NewEx, PyCFunction, PyMethodDef, - W_PyCMethodObject, W_PyCFunctionObject) + W_PyCMethodObject, W_PyCFunctionObject, extract_doc, extract_txtsig) from pypy.module.cpyext.modsupport import convert_method_defs from pypy.module.cpyext.pyobject import ( PyObject, make_ref, from_ref, get_typedescr, make_typedescr, @@ -331,7 +331,7 @@ if not getattr(struct, slot_names[1]): setattr(struct, slot_names[1], slot_func_helper) -def add_operators(space, dict_w, pto): +def add_operators(space, dict_w, pto, name): from pypy.module.cpyext.object import PyObject_HashNotImplemented hash_not_impl = PyObject_HashNotImplemented.api_func.get_llhelper(space) for method_name, slot_names, wrapper_func, wrapper_func_kwds, doc in slotdefs_for_wrappers: @@ -361,8 +361,8 @@ wrapper_func_kwds, doc, func_voidp, offset=offset) dict_w[method_name] = w_obj if pto.c_tp_doc: - dict_w['__doc__'] = space.newtext( - rffi.charp2str(cts.cast('char*', pto.c_tp_doc))) + raw_doc = rffi.charp2str(cts.cast('char*', pto.c_tp_doc)) + dict_w['__doc__'] = space.newtext(extract_doc(raw_doc, name)) if pto.c_tp_new: add_tp_new_wrapper(space, dict_w, pto) @@ -504,12 +504,12 @@ bases_w = space.fixedview(from_ref(space, pto.c_tp_bases)) dict_w = {} - add_operators(space, dict_w, pto) + name = rffi.charp2str(cts.cast('char*', pto.c_tp_name)) + add_operators(space, dict_w, pto, name) convert_method_defs(space, dict_w, pto.c_tp_methods, self) convert_getset_defs(space, dict_w, pto.c_tp_getset, self) convert_member_defs(space, dict_w, pto.c_tp_members, self) - name = rffi.charp2str(cts.cast('char*', pto.c_tp_name)) flag_heaptype = pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE if flag_heaptype: minsize = rffi.sizeof(PyHeapTypeObject.TO) @@ -527,8 +527,9 @@ elif pto.c_tp_as_mapping and pto.c_tp_as_mapping.c_mp_subscript: self.flag_map_or_seq = 'M' if pto.c_tp_doc: - self.w_doc = space.newtext( - rffi.charp2str(cts.cast('char*', pto.c_tp_doc))) + rawdoc = rffi.charp2str(cts.cast('char*', pto.c_tp_doc)) + self.w_doc = space.newtext_or_none(extract_doc(rawdoc, name)) + self.text_signature = extract_txtsig(rawdoc, name) @bootstrap_function def init_typeobject(space): From pypy.commits at gmail.com Sun Nov 12 17:18:59 2017 From: pypy.commits at gmail.com (rlamy) Date: Sun, 12 Nov 2017 14:18:59 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Enable __text_signature__ tests in CPython test suite Message-ID: <5a08c8d3.84b5df0a.b013e.a97a@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93000:49df4f50208f Date: 2017-11-12 22:18 +0000 http://bitbucket.org/pypy/pypy/changeset/49df4f50208f/ Log: Enable __text_signature__ tests in CPython test suite diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -769,7 +769,6 @@ kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True}, formatted='(path, *, dir_fd=None, follow_symlinks=True)') - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullagrspec_builtin_func(self): @@ -778,7 +777,6 @@ spec = inspect.getfullargspec(builtin) self.assertEqual(spec.defaults[0], 'avocado') - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullagrspec_builtin_func_no_signature(self): @@ -1959,7 +1957,6 @@ ('kwargs', ..., int, "var_keyword")), ...)) - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_builtins(self): @@ -2033,7 +2030,6 @@ # Regression test for issue #20586 test_callable(_testcapi.docstring_with_signature_but_no_doc) - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_decorated_builtins(self): @@ -2056,7 +2052,6 @@ follow_wrapped=False), inspect.signature(wrapper_like)) - @cpython_only def test_signature_on_builtins_no_signature(self): import _testcapi with self.assertRaisesRegex(ValueError, @@ -3417,7 +3412,6 @@ # This test case provides a home for checking that particular APIs # have signatures available for introspection - @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_builtins_have_signatures(self): From pypy.commits at gmail.com Mon Nov 13 06:14:33 2017 From: pypy.commits at gmail.com (cfbolz) Date: Mon, 13 Nov 2017 03:14:33 -0800 (PST) Subject: [pypy-commit] pypy default: fix issue #2701 Message-ID: <5a097e99.54d91c0a.b9257.b4f8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93001:b95f1240ad90 Date: 2017-11-13 12:13 +0100 http://bitbucket.org/pypy/pypy/changeset/b95f1240ad90/ Log: fix issue #2701 allow the sequences future-import, docstring, future-import for CPython bug-compatibility diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -85,13 +85,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -208,3 +208,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED From pypy.commits at gmail.com Mon Nov 13 07:53:58 2017 From: pypy.commits at gmail.com (stian) Date: Mon, 13 Nov 2017 04:53:58 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Remove invert logic from rqshift (it is only used with positive numbers) Message-ID: <5a0995e6.45aa1c0a.8fd97.8e7f@mx.google.com> Author: stian Branch: math-improvements Changeset: r93002:3f4aca709e49 Date: 2017-11-13 13:53 +0100 http://bitbucket.org/pypy/pypy/changeset/3f4aca709e49/ Log: Remove invert logic from rqshift (it is only used with positive numbers) diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -1314,38 +1314,22 @@ wordshift = int_other / SHIFT loshift = int_other % SHIFT newsize = self.numdigits() - wordshift - - invert = False - if self.sign == -1: - first = self.digit(0) - if first == 0: - a = self.invert().rqshift(int_other) - return a.invert() - invert = True if newsize <= 0: - if invert: - return ONENEGATIVERBIGINT - else: - return NULLRBIGINT + return NULLRBIGINT - hishift = SHIFT - loshift z = rbigint([NULLDIGIT] * newsize, self.sign, newsize) i = 0 while i < newsize: digit = self.udigit(wordshift) - if invert and i == 0 and wordshift == 0: - digit -= 1 newdigit = (digit >> loshift) if i+1 < newsize: newdigit |= (self.udigit(wordshift+1) << hishift) z.setdigit(i, newdigit) i += 1 - wordshift += 1 - if invert: - z.setdigit(0, z.digit(0)+1) + wordshift += 1 z._normalize() return z rshift._always_inline_ = 'try' # It's so fast that it's always benefitial. diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -609,21 +609,18 @@ res1 = f1.lqshift(z).tolong() res2 = f1.rqshift(z).tolong() res3 = nf1.lqshift(z).tolong() - res4 = nf1.rqshift(z).tolong() + assert res1 == num << z assert res2 == num >> z assert res3 == -num << z - assert res4 == -num >> z - # Large digit, also invertion test. + + # Large digit for x in range((1 << SHIFT) - 10, (1 << SHIFT) + 10): f1 = rbigint.fromlong(x) - nf1 = rbigint.fromlong(-x) assert f1.rqshift(SHIFT).tolong() == x >> SHIFT - assert nf1.rqshift(SHIFT).tolong() == -x >> SHIFT assert f1.rqshift(SHIFT+1).tolong() == x >> (SHIFT+1) - assert nf1.rqshift(SHIFT+1).tolong() == -x >> (SHIFT+1) def test_from_list_n_bits(self): for x in ([3L ** 30L, 5L ** 20L, 7 ** 300] + From pypy.commits at gmail.com Mon Nov 13 11:55:51 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 08:55:51 -0800 (PST) Subject: [pypy-commit] pypy py3.5: backout c7e665a4d094: this hack isn't needed any more Message-ID: <5a09ce97.b796df0a.f26c0.2e6b@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93003:e73ed06e7955 Date: 2017-11-13 16:53 +0000 http://bitbucket.org/pypy/pypy/changeset/e73ed06e7955/ Log: backout c7e665a4d094: this hack isn't needed any more diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py --- a/lib-python/3/inspect.py +++ b/lib-python/3/inspect.py @@ -2078,8 +2078,6 @@ s = getattr(func, "__text_signature__", None) if not s: - if func is object: # XXX PyPy hack until we support __text_signature__ - return '()' # in the same cases as CPython raise ValueError("no signature found for builtin {!r}".format(func)) return _signature_fromstr(cls, func, s, skip_bound_arg) From pypy.commits at gmail.com Mon Nov 13 13:15:03 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 10:15:03 -0800 (PST) Subject: [pypy-commit] pypy default: Test an obscure difference between C-defined and Python-defined functions Message-ID: <5a09e127.0ec6df0a.73939.4fa3@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93004:3b8c612bb506 Date: 2017-11-13 18:14 +0000 http://bitbucket.org/pypy/pypy/changeset/3b8c612bb506/ Log: Test an obscure difference between C-defined and Python-defined functions diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py --- a/pypy/module/cpyext/test/test_methodobject.py +++ b/pypy/module/cpyext/test/test_methodobject.py @@ -93,6 +93,22 @@ assert mod.isSameFunction(mod.getarg_O) raises(SystemError, mod.isSameFunction, 1) + def test_function_as_method(self): + # Unlike user functions, builtins don't become methods + mod = self.import_extension('foo', [ + ('f', 'METH_NOARGS', + ''' + return PyLong_FromLong(42); + '''), + ]) + class A(object): pass + A.f = mod.f + A.g = lambda: 42 + assert A.f() == 42 + raises(TypeError, A.g) + assert A().f() == 42 + raises(TypeError, A().g) + def test_check(self): mod = self.import_extension('foo', [ ('check', 'METH_O', @@ -116,4 +132,3 @@ assert mod.check(A) == 0 assert mod.check(A.meth) == 0 assert mod.check(A.stat) == 0 - From pypy.commits at gmail.com Mon Nov 13 15:31:00 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 12:31:00 -0800 (PST) Subject: [pypy-commit] pypy default: Implement cpyext.is_cpyext_function() Message-ID: <5a0a0104.cc1d1c0a.1ad51.7381@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93005:a305590465d6 Date: 2017-11-13 20:30 +0000 http://bitbucket.org/pypy/pypy/changeset/a305590465d6/ Log: Implement cpyext.is_cpyext_function() inspect.isbuiltin() now returns True for functions implemented in C, like on CPython. diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py --- a/pypy/module/cpyext/__init__.py +++ b/pypy/module/cpyext/__init__.py @@ -5,6 +5,7 @@ class Module(MixedModule): interpleveldefs = { 'load_module': 'api.load_extension_module', + 'is_cpyext_function': 'interp_cpyext.is_cpyext_function', } appleveldefs = { diff --git a/pypy/module/cpyext/interp_cpyext.py b/pypy/module/cpyext/interp_cpyext.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/interp_cpyext.py @@ -0,0 +1,4 @@ +from .methodobject import W_PyCFunctionObject + +def is_cpyext_function(space, w_arg): + return space.newbool(isinstance(w_arg, W_PyCFunctionObject)) diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -381,6 +381,11 @@ def test_export_function(self): import sys + if '__pypy__' in sys.modules: + from cpyext import is_cpyext_function + else: + import inspect + is_cpyext_function = inspect.isbuiltin init = """ if (Py_IsInitialized()) Py_InitModule("foo", methods); @@ -399,6 +404,7 @@ assert 'foo' in sys.modules assert 'return_pi' in dir(module) assert module.return_pi is not None + assert is_cpyext_function(module.return_pi) assert module.return_pi() == 3.14 assert module.return_pi.__module__ == 'foo' @@ -777,14 +783,14 @@ # Set an exception and return NULL raises(TypeError, module.set, None) - # clear any exception and return a value + # clear any exception and return a value assert module.clear(1) == 1 # Set an exception, but return non-NULL expected = 'An exception was set, but function returned a value' exc = raises(SystemError, module.set, 1) assert exc.value[0] == expected - + # Clear the exception and return a value, all is OK assert module.clear(1) == 1 diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py --- a/pypy/module/cpyext/test/test_methodobject.py +++ b/pypy/module/cpyext/test/test_methodobject.py @@ -104,8 +104,10 @@ class A(object): pass A.f = mod.f A.g = lambda: 42 + # Unbound method assert A.f() == 42 raises(TypeError, A.g) + # Bound method assert A().f() == 42 raises(TypeError, A().g) From pypy.commits at gmail.com Mon Nov 13 16:07:52 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 13:07:52 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a0a09a8.17361c0a.4db14.2372@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93006:19326fb34a67 Date: 2017-11-13 21:07 +0000 http://bitbucket.org/pypy/pypy/changeset/19326fb34a67/ Log: hg merge default diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py --- a/extra_tests/test_bytes.py +++ b/extra_tests/test_bytes.py @@ -1,25 +1,27 @@ from hypothesis import strategies as st from hypothesis import given, example - at given(st.binary(), st.binary(), st.binary()) +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) def test_find(u, prefix, suffix): s = prefix + u + suffix assert 0 <= s.find(u) <= len(prefix) assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_index(u, prefix, suffix): s = prefix + u + suffix assert 0 <= s.index(u) <= len(prefix) assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_rfind(u, prefix, suffix): s = prefix + u + suffix assert s.rfind(u) >= len(prefix) assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) - at given(st.binary(), st.binary(), st.binary()) + at given(st_bytestring, st_bytestring, st_bytestring) def test_rindex(u, prefix, suffix): s = prefix + u + suffix assert s.rindex(u) >= len(prefix) @@ -34,20 +36,20 @@ start = max(start + len(u), 0) return start, end - at given(st.binary(), st.binary()) + at given(st_bytestring, st_bytestring) def test_startswith_basic(u, v): assert u.startswith(v) is (u[:len(v)] == v) @example(b'x', b'', 1) @example(b'x', b'', 2) - at given(st.binary(), st.binary(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers()) def test_startswith_start(u, v, start): expected = u[start:].startswith(v) if v else (start <= len(u)) assert u.startswith(v, start) is expected @example(b'x', b'', 1, 0) @example(b'xx', b'', -1, 0) - at given(st.binary(), st.binary(), st.integers(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) def test_startswith_3(u, v, start, end): if v: expected = u[start:end].startswith(v) @@ -56,7 +58,7 @@ expected = start0 <= len(u) and start0 <= end0 assert u.startswith(v, start, end) is expected - at given(st.binary(), st.binary()) + at given(st_bytestring, st_bytestring) def test_endswith_basic(u, v): if len(v) > len(u): assert u.endswith(v) is False @@ -65,14 +67,14 @@ @example(b'x', b'', 1) @example(b'x', b'', 2) - at given(st.binary(), st.binary(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers()) def test_endswith_2(u, v, start): expected = u[start:].endswith(v) if v else (start <= len(u)) assert u.endswith(v, start) is expected @example(b'x', b'', 1, 0) @example(b'xx', b'', -1, 0) - at given(st.binary(), st.binary(), st.integers(), st.integers()) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) def test_endswith_3(u, v, start, end): if v: expected = u[start:end].endswith(v) diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -87,13 +87,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -193,3 +193,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == 0 + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == 0 diff --git a/pypy/module/_warnings/interp_warnings.py b/pypy/module/_warnings/interp_warnings.py --- a/pypy/module/_warnings/interp_warnings.py +++ b/pypy/module/_warnings/interp_warnings.py @@ -292,7 +292,7 @@ if space.isinstance_w(w_message, space.w_Warning): w_text = space.str(w_message) w_category = space.type(w_message) - elif (not space.isinstance_w(w_message, space.w_unicode) or + elif (not space.isinstance_w(w_message, space.w_unicode) and not space.isinstance_w(w_message, space.w_bytes)): w_text = space.str(w_message) w_message = space.call_function(w_category, w_message) diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py --- a/pypy/module/cpyext/__init__.py +++ b/pypy/module/cpyext/__init__.py @@ -4,6 +4,7 @@ class Module(MixedModule): interpleveldefs = { + 'is_cpyext_function': 'interp_cpyext.is_cpyext_function', } appleveldefs = { @@ -41,7 +42,6 @@ import pypy.module.cpyext.pyerrors import pypy.module.cpyext.typeobject import pypy.module.cpyext.object -import pypy.module.cpyext.buffer import pypy.module.cpyext.bytesobject import pypy.module.cpyext.bytearrayobject import pypy.module.cpyext.tupleobject @@ -50,6 +50,7 @@ import pypy.module.cpyext.longobject import pypy.module.cpyext.listobject import pypy.module.cpyext.sequence +import pypy.module.cpyext.buffer import pypy.module.cpyext.eval import pypy.module.cpyext.import_ import pypy.module.cpyext.mapping diff --git a/pypy/module/cpyext/interp_cpyext.py b/pypy/module/cpyext/interp_cpyext.py new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/interp_cpyext.py @@ -0,0 +1,4 @@ +from .methodobject import W_PyCFunctionObject + +def is_cpyext_function(space, w_arg): + return space.newbool(isinstance(w_arg, W_PyCFunctionObject)) diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py --- a/pypy/module/cpyext/test/test_cpyext.py +++ b/pypy/module/cpyext/test/test_cpyext.py @@ -375,6 +375,11 @@ def test_export_function(self): import sys + if '__pypy__' in sys.modules: + from cpyext import is_cpyext_function + else: + import inspect + is_cpyext_function = inspect.isbuiltin body = """ PyObject* foo_pi(PyObject* self, PyObject *args) { @@ -396,6 +401,7 @@ assert 'foo' in sys.modules assert 'return_pi' in dir(module) assert module.return_pi is not None + assert is_cpyext_function(module.return_pi) assert module.return_pi() == 3.14 assert module.return_pi.__module__ == 'foo' diff --git a/pypy/module/cpyext/test/test_methodobject.py b/pypy/module/cpyext/test/test_methodobject.py --- a/pypy/module/cpyext/test/test_methodobject.py +++ b/pypy/module/cpyext/test/test_methodobject.py @@ -77,6 +77,23 @@ assert mod.isSameFunction(mod.getarg_O) raises(SystemError, mod.isSameFunction, 1) + def test_function_as_method(self): + # Unlike user functions, builtins don't become methods + mod = self.import_extension('foo', [ + ('f', 'METH_NOARGS', + ''' + return PyLong_FromLong(42); + '''), + ]) + class A(object): pass + A.f = mod.f + A.g = lambda: 42 + # Unbound method + assert A.f() == A.g() == 42 + # Bound method + assert A().f() == 42 + raises(TypeError, A().g) + def test_check(self): mod = self.import_extension('foo', [ ('check', 'METH_O', diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -362,6 +362,7 @@ @specialize.argtype(1) def _inplace_add(self, other): + resizelist_hint(self._data, len(self._data) + len(other)) for i in range(len(other)): self._data.append(other[i]) diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ cffi>=1.4.0 +vmprof>=0.4.10 # required to parse log files in rvmprof tests # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -15,10 +15,34 @@ typeof, s_ImpossibleValue, SomeInstance, intersection, difference) from rpython.annotator.bookkeeper import Bookkeeper from rpython.rtyper.normalizecalls import perform_normalizations +from collections import deque log = AnsiLogger("annrpython") +class ShuffleDict(object): + def __init__(self): + self._d = {} + self.keys = deque() + + def __setitem__(self, k, v): + if k in self._d: + self._d[k] = v + else: + self._d[k] = v + self.keys.append(k) + + def __getitem__(self, k): + return self._d[k] + + def popitem(self): + key = self.keys.popleft() + item = self._d.pop(key) + return (key, item) + + def __nonzero__(self): + return bool(self._d) + class RPythonAnnotator(object): """Block annotator for RPython. See description in doc/translation.txt.""" @@ -33,7 +57,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = {} # map {block: graph-containing-it} + self.pendingblocks = ShuffleDict() # map {block: graph-containing-it} self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -39,9 +39,7 @@ CACHE_DIR = os.path.realpath(os.path.join(MAINDIR, '_cache')) PLATFORMS = [ - 'maemo', 'host', - 'distutils', 'arm', ] diff --git a/rpython/rlib/rvmprof/src/shared/machine.c b/rpython/rlib/rvmprof/src/shared/machine.c --- a/rpython/rlib/rvmprof/src/shared/machine.c +++ b/rpython/rlib/rvmprof/src/shared/machine.c @@ -28,7 +28,7 @@ #elif __linux__ return "linux"; #elif __FreeBSD__ - return "freebsd" + return "freebsd"; #else #error "Unknown compiler" #endif diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c --- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c +++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c @@ -29,6 +29,7 @@ static int (*unw_is_signal_frame)(unw_cursor_t *) = NULL; static int (*unw_getcontext)(unw_context_t *) = NULL; #else +#define UNW_LOCAL_ONLY #include #endif diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -32,12 +32,21 @@ static size_t threads_size = 0; static size_t thread_count = 0; static size_t threads_size_step = 8; -#endif int vmprof_get_itimer_type(void) { return itimer_type; } +int vmprof_get_signal_type(void) { + return signal_type; +} +#endif + +#ifdef VMPROF_WINDOWS +#include "vmprof_win.h" +#endif + + int vmprof_is_enabled(void) { return is_enabled; } @@ -62,10 +71,6 @@ profile_interval_usec = value; } -int vmprof_get_signal_type(void) { - return signal_type; -} - char *vmprof_init(int fd, double interval, int memory, int proflines, const char *interp_name, int native, int real_time) { diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -15,7 +15,9 @@ #include #endif +#ifdef VMPROF_UNIX #include "vmprof_getpc.h" +#endif #ifdef VMPROF_LINUX #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_memory.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_memory.c @@ -8,7 +8,7 @@ #include static mach_port_t mach_task; -#else +#elif defined(VMPROF_UNIX) #include #include #include diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_unix.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_unix.c @@ -41,8 +41,6 @@ void vmprof_ignore_signals(int ignored) { if (ignored) { - /* set the last bit, and wait until concurrently-running signal - handlers finish */ __sync_add_and_fetch(&signal_handler_ignore, 1L); while (signal_handler_entries != 0L) { usleep(1); @@ -370,7 +368,7 @@ goto error; if (install_sigprof_timer() == -1) goto error; - vmprof_ignore_signals(0); + signal_handler_ignore = 0; return 0; error: @@ -394,7 +392,7 @@ int vmprof_disable(void) { - vmprof_ignore_signals(1); + signal_handler_ignore = 1; vmprof_set_profile_interval_usec(0); #ifdef VMP_SUPPORTS_NATIVE_PROFILING disable_cpyprof(); diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.c b/rpython/rlib/rvmprof/src/shared/vmprof_win.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.c @@ -1,7 +1,7 @@ -// cannot include this header because it also has definitions -#include "windows.h" -#include "compat.h" -#include "vmp_stack.h" +#include "vmprof_win.h" + +volatile int thread_started = 0; +volatile int enabled = 0; HANDLE write_mutex; @@ -12,7 +12,20 @@ return 0; } -#include +int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, + int auto_retry) +{ + char buf[2048]; + long namelen; + + namelen = (long)strnlen(code_name, 1023); + buf[0] = MARKER_VIRTUAL_IP; + *(intptr_t*)(buf + 1) = code_uid; + *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; + memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); + vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); + return 0; +} int vmp_write_all(const char *buf, size_t bufsize) { @@ -40,3 +53,168 @@ return 0; } +HANDLE write_mutex; + +#include "vmprof_common.h" + +int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) +{ + HRESULT result; + HANDLE hThread; + int depth; + CONTEXT ctx; +#ifdef RPYTHON_LL2CTYPES + return 0; // not much we can do +#else +#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) + return 0; // we can't freeze threads, unsafe +#else + hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); + if (!hThread) { + return -1; + } + result = SuspendThread(hThread); + if(result == 0xffffffff) + return -1; // possible, e.g. attached debugger or thread alread suspended + // find the correct thread +#ifdef RPYTHON_VMPROF + ctx.ContextFlags = CONTEXT_FULL; + if (!GetThreadContext(hThread, &ctx)) + return -1; + depth = get_stack_trace(tstate->vmprof_tl_stack, + stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); + stack->depth = depth; + stack->stack[depth++] = thread_id; + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#else + depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, + MAX_STACK_DEPTH, 0, 0); + stack->depth = depth; + stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); + stack->count = 1; + stack->marker = MARKER_STACKTRACE; + ResumeThread(hThread); + return depth; +#endif + +#endif +#endif +} + +#ifndef RPYTHON_VMPROF +static +PY_WIN_THREAD_STATE * get_current_thread_state(void) +{ +#if PY_MAJOR_VERSION < 3 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#else + return _PyThreadState_UncheckedGet(); +#endif +} +#endif + +long __stdcall vmprof_mainloop(void *arg) +{ +#ifdef RPYTHON_LL2CTYPES + // for tests only + return 0; +#else + // it is not a test case! + PY_WIN_THREAD_STATE *tstate; + HANDLE hThreadSnap = INVALID_HANDLE_VALUE; + prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); + int depth; +#ifndef RPYTHON_VMPROF + // cpython version + while (1) { + Sleep(vmprof_get_profile_interval_usec() * 1000); + if (!enabled) { + continue; + } + tstate = get_current_thread_state(); + if (!tstate) + continue; + depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); + } + } +#else + // pypy version + while (1) { + //Sleep(vmprof_get_profile_interval_usec() * 1000); + Sleep(10); + if (!enabled) { + continue; + } + _RPython_ThreadLocals_Acquire(); + tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head + tstate = _RPython_ThreadLocals_Enum(tstate); + while (tstate) { + if (tstate->ready == 42) { + depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); + if (depth > 0) { + vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), + depth * sizeof(void *) + + sizeof(struct prof_stacktrace_s) - + offsetof(struct prof_stacktrace_s, marker)); + } + } + tstate = _RPython_ThreadLocals_Enum(tstate); + } + _RPython_ThreadLocals_Release(); + } +#endif +#endif +} + +RPY_EXTERN +int vmprof_enable(int memory, int native, int real_time) +{ + if (!thread_started) { + if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { + return -1; + } + thread_started = 1; + } + enabled = 1; + return 0; +} + +RPY_EXTERN +int vmprof_disable(void) +{ + char marker = MARKER_TRAILER; + (void)vmp_write_time_now(MARKER_TRAILER); + + enabled = 0; + vmp_set_profile_fileno(-1); + return 0; +} + +RPY_EXTERN +void vmprof_ignore_signals(int ignored) +{ + enabled = !ignored; +} + +int vmp_native_enable(void) +{ + return 0; +} + +void vmp_native_disable(void) +{ +} + +int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, + int max_depth, intptr_t pc) +{ + return 0; +} diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_win.h b/rpython/rlib/rvmprof/src/shared/vmprof_win.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_win.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_win.h @@ -3,20 +3,13 @@ #include "windows.h" #include "compat.h" #include "vmp_stack.h" - -HANDLE write_mutex; +#include int prepare_concurrent_bufs(void); -#include "vmprof_common.h" -#include - // This file has been inspired (but not copied from since the LICENSE // would not allow it) from verysleepy profiler -volatile int thread_started = 0; -volatile int enabled = 0; - int vmp_write_all(const char *buf, size_t bufsize); #ifdef RPYTHON_VMPROF @@ -26,178 +19,14 @@ #endif -RPY_EXTERN int vmprof_register_virtual_function(char *code_name, intptr_t code_uid, - int auto_retry) -{ - char buf[2048]; - long namelen; + int auto_retry); - namelen = (long)strnlen(code_name, 1023); - buf[0] = MARKER_VIRTUAL_IP; - *(intptr_t*)(buf + 1) = code_uid; - *(long*)(buf + 1 + sizeof(intptr_t)) = namelen; - memcpy(buf + 1 + sizeof(intptr_t) + sizeof(long), code_name, namelen); - vmp_write_all(buf, 1 + sizeof(intptr_t) + sizeof(long) + namelen); - return 0; -} - -int vmprof_snapshot_thread(DWORD thread_id, PY_WIN_THREAD_STATE *tstate, prof_stacktrace_s *stack) -{ - HRESULT result; - HANDLE hThread; - int depth; - CONTEXT ctx; -#ifdef RPYTHON_LL2CTYPES - return 0; // not much we can do -#else -#if !defined(RPY_TLOFS_thread_ident) && defined(RPYTHON_VMPROF) - return 0; // we can't freeze threads, unsafe -#else - hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id); - if (!hThread) { - return -1; - } - result = SuspendThread(hThread); - if(result == 0xffffffff) - return -1; // possible, e.g. attached debugger or thread alread suspended - // find the correct thread -#ifdef RPYTHON_VMPROF - ctx.ContextFlags = CONTEXT_FULL; - if (!GetThreadContext(hThread, &ctx)) - return -1; - depth = get_stack_trace(tstate->vmprof_tl_stack, - stack->stack, MAX_STACK_DEPTH-2, ctx.Eip); - stack->depth = depth; - stack->stack[depth++] = thread_id; - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#else - depth = vmp_walk_and_record_stack(tstate->frame, stack->stack, - MAX_STACK_DEPTH, 0, 0); - stack->depth = depth; - stack->stack[depth++] = (void*)((ULONG_PTR)thread_id); - stack->count = 1; - stack->marker = MARKER_STACKTRACE; - ResumeThread(hThread); - return depth; -#endif - -#endif -#endif -} - -#ifndef RPYTHON_VMPROF -static -PY_WIN_THREAD_STATE * get_current_thread_state(void) -{ -#if PY_MAJOR_VERSION < 3 - return _PyThreadState_Current; -#elif PY_VERSION_HEX < 0x03050200 - return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); -#else - return _PyThreadState_UncheckedGet(); -#endif -} -#endif - -long __stdcall vmprof_mainloop(void *arg) -{ -#ifdef RPYTHON_LL2CTYPES - // for tests only - return 0; -#else - // it is not a test case! - PY_WIN_THREAD_STATE *tstate; - HANDLE hThreadSnap = INVALID_HANDLE_VALUE; - prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE); - int depth; -#ifndef RPYTHON_VMPROF - // cpython version - while (1) { - Sleep(profile_interval_usec * 1000); - if (!enabled) { - continue; - } - tstate = get_current_thread_state(); - if (!tstate) - continue; - depth = vmprof_snapshot_thread(tstate->thread_id, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - SIZEOF_PROF_STACKTRACE + depth * sizeof(void*)); - } - } -#else - // pypy version - while (1) { - //Sleep(profile_interval_usec * 1000); - Sleep(10); - if (!enabled) { - continue; - } - _RPython_ThreadLocals_Acquire(); - tstate = _RPython_ThreadLocals_Head(); // the first one is one behind head - tstate = _RPython_ThreadLocals_Enum(tstate); - while (tstate) { - if (tstate->ready == 42) { - depth = vmprof_snapshot_thread(tstate->thread_ident, tstate, stack); - if (depth > 0) { - vmp_write_all((char*)stack + offsetof(prof_stacktrace_s, marker), - depth * sizeof(void *) + - sizeof(struct prof_stacktrace_s) - - offsetof(struct prof_stacktrace_s, marker)); - } - } - tstate = _RPython_ThreadLocals_Enum(tstate); - } - _RPython_ThreadLocals_Release(); - } -#endif -#endif -} - -RPY_EXTERN -int vmprof_enable(int memory, int native, int real_time) -{ - if (!thread_started) { - if (!CreateThread(NULL, 0, vmprof_mainloop, NULL, 0, NULL)) { - return -1; - } - thread_started = 1; - } - enabled = 1; - return 0; -} - -RPY_EXTERN -int vmprof_disable(void) -{ - char marker = MARKER_TRAILER; - (void)vmp_write_time_now(MARKER_TRAILER); - - enabled = 0; - vmp_set_profile_fileno(-1); - return 0; -} - -RPY_EXTERN -void vmprof_ignore_signals(int ignored) -{ - enabled = !ignored; -} - -int vmp_native_enable(void) { - return 0; -} - -void vmp_native_disable(void) { -} - +PY_WIN_THREAD_STATE * get_current_thread_state(void); +int vmprof_enable(int memory, int native, int real_time); +int vmprof_disable(void); +void vmprof_ignore_signals(int ignored); +int vmp_native_enable(void); +void vmp_native_disable(void); int get_stack_trace(PY_WIN_THREAD_STATE * current, void** result, - int max_depth, intptr_t pc) -{ - return 0; -} + int max_depth, intptr_t pc); diff --git a/rpython/rlib/rvmprof/test/test_file.py b/rpython/rlib/rvmprof/test/test_file.py --- a/rpython/rlib/rvmprof/test/test_file.py +++ b/rpython/rlib/rvmprof/test/test_file.py @@ -2,25 +2,43 @@ import urllib2, py from os.path import join +RVMPROF = py.path.local(__file__).join('..', '..') def github_raw_file(repo, path, branch='master'): - return "https://raw.githubusercontent.com/{repo}/{branch}/{path}".format(**dict( - repo=repo, path=path, branch=branch - )) + url = "https://raw.githubusercontent.com/{repo}/{branch}/{path}" + return url.format(repo=repo, path=path, branch=branch) +def get_list_of_files(shared): + files = list(shared.visit('*.[ch]')) + # in PyPy we checkin the result of ./configure; as such, these files are + # not in github and can be skipped + files.remove(shared.join('libbacktrace', 'config-x86_32.h')) + files.remove(shared.join('libbacktrace', 'config-x86_64.h')) + files.remove(shared.join('libbacktrace', 'gstdint.h')) + return files def test_same_file(): - for root, dirs, files in os.walk('rpython/rlib/rvmprof/src/shared'): - for file in files: - if not (file.endswith(".c") or file.endswith(".h")): - continue - url = github_raw_file("vmprof/vmprof-python", "src/%s" % file) - source = urllib2.urlopen(url).read() - # - dest = py.path.local(join(root, file)).read() - if source != dest: - raise AssertionError("%s was updated, but changes were" - "not copied over to PyPy" % url) - else: - print("%s matches" % url) - break # do not walk dirs + shared = RVMPROF.join('src', 'shared') + files = get_list_of_files(shared) + assert files, 'cannot find any C file, probably the directory is wrong?' + no_matches = [] + print + for file in files: + path = file.relto(shared) + url = github_raw_file("vmprof/vmprof-python", "src/%s" % path) + source = urllib2.urlopen(url).read() + dest = file.read() + shortname = file.relto(RVMPROF) + if source == dest: + print '%s matches' % shortname + else: + print '%s does NOT match' % shortname + no_matches.append(file) + # + if no_matches: + print + print 'The following file dit NOT match' + for f in no_matches: + print ' ', f.relto(RVMPROF) + raise AssertionError("some files were updated on github, " + "but were not copied here") diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -1,214 +1,183 @@ import py, os +import pytest +import time from rpython.tool.udir import udir from rpython.rlib import rvmprof from rpython.translator.c.test.test_genc import compile -from rpython.rlib.objectmodel import we_are_translated -from rpython.rlib.nonconst import NonConstant from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.rtyper.lltypesystem import rffi, lltype + at pytest.mark.usefixtures('init') +class RVMProfTest(object): -def test_vmprof_execute_code_1(): + ENTRY_POINT_ARGS = () - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass + class MyCode(object): + def __init__(self, name='py:code:0:noname'): + self.name = name - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): + def get_name(self): + return self.name + + @pytest.fixture + def init(self): + self.register() + self.rpy_entry_point = compile(self.entry_point, self.ENTRY_POINT_ARGS) + + def register(self): + rvmprof.register_code_object_class(self.MyCode, + self.MyCode.get_name) + + +class TestExecuteCode(RVMProfTest): + + def entry_point(self): + res = self.main(self.MyCode(), 5) + assert res == 42 + return 0 + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): print num return 42 - def f(): - res = main(MyCode(), 5) + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 + + +class TestResultClass(RVMProfTest): + + class A: pass + + @rvmprof.vmprof_execute_code("xcode2", lambda self, num, code: code, + result_class=A) + def main(self, num, code): + print num + return self.A() + + def entry_point(self): + a = self.main(7, self.MyCode()) + assert isinstance(a, self.A) + return 0 + + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 + + +class TestRegisterCode(RVMProfTest): + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, num: code) + def main(self, code, num): + print num + return 42 + + def entry_point(self): + code = self.MyCode() + rvmprof.register_code(code, lambda code: 'some code') + res = self.main(code, 5) assert res == 42 return 0 - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 + def test(self): + assert self.entry_point() == 0 + assert self.rpy_entry_point() == 0 -def test_vmprof_execute_code_2(): +class RVMProfSamplingTest(RVMProfTest): - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported: - pass + # the kernel will deliver SIGPROF at max 250 Hz. See also + # https://github.com/vmprof/vmprof-python/issues/163 + SAMPLING_INTERVAL = 1/250.0 - class A: - pass + @pytest.fixture + def init(self, tmpdir): + self.tmpdir = tmpdir + self.tmpfile = tmpdir.join('profile.vmprof') + self.tmpfilename = str(self.tmpfile) + super(RVMProfSamplingTest, self).init() - @rvmprof.vmprof_execute_code("xcode2", lambda num, code: code, - result_class=A) - def main(num, code): - print num - return A() + ENTRY_POINT_ARGS = (int, float) + def entry_point(self, value, delta_t): + code = self.MyCode('py:code:52:test_enable') + rvmprof.register_code(code, self.MyCode.get_name) + fd = os.open(self.tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) + rvmprof.enable(fd, self.SAMPLING_INTERVAL) + start = time.time() + res = 0 + while time.time() < start+delta_t: + res = self.main(code, value) + rvmprof.disable() + os.close(fd) + return res - def f(): - a = main(7, MyCode()) - assert isinstance(a, A) - return 0 + def approx_equal(self, a, b, tolerance=0.1): + max_diff = (a+b)/2.0 * tolerance + return abs(a-b) < max_diff - assert f() == 0 - fn = compile(f, []) - assert fn() == 0 +class TestEnable(RVMProfSamplingTest): -def test_register_code(): - - class MyCode: - pass - try: - rvmprof.register_code_object_class(MyCode, lambda code: 'some code') - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - print num - return 42 - - def f(): - code = MyCode() - rvmprof.register_code(code, lambda code: 'some code') - res = main(code, 5) - assert res == 42 - return 0 - - assert f() == 0 - fn = compile(f, [], gcpolicy="minimark") - assert fn() == 0 - - -def test_enable(): - - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - print num + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): s = 0 - for i in range(num): + for i in range(count): s += (i << 1) - if s % 2123423423 == 0: - print s return s - tmpfilename = str(udir.join('test_rvmprof')) + def test(self): + from vmprof import read_profile + assert self.entry_point(10**4, 0.1) == 99990000 + assert self.tmpfile.check() + self.tmpfile.remove() + # + assert self.rpy_entry_point(10**4, 0.5) == 99990000 + assert self.tmpfile.check() + prof = read_profile(self.tmpfilename) + tree = prof.get_tree() + assert tree.name == 'py:code:52:test_enable' + assert self.approx_equal(tree.count, 0.5/self.SAMPLING_INTERVAL) - def f(): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) - code = MyCode() - rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_WRONLY | os.O_CREAT, 0666) - if we_are_translated(): - num = 100000000 - period = 0.0001 + +class TestNative(RVMProfSamplingTest): + + @pytest.fixture + def init(self, tmpdir): + eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + separate_module_sources=[""" + RPY_EXTERN int native_func(int d) { + int j = 0; + if (d > 0) { + return native_func(d-1); + } else { + for (int i = 0; i < 42000; i++) { + j += 1; + } + } + return j; + } + """]) + self.native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, + compilation_info=eci) + super(TestNative, self).init(tmpdir) + + @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) + def main(self, code, count): + if count > 0: + return self.main(code, count-1) else: - num = 10000 - period = 0.9 - rvmprof.enable(fd, period) - res = main(code, num) - #assert res == 499999500000 - rvmprof.disable() - os.close(fd) - return 0 + return self.native_func(100) - def check_profile(filename): - from vmprof import read_profile - - prof = read_profile(filename) - assert prof.get_tree().name.startswith("py:") - assert prof.get_tree().count - - assert f() == 0 - assert os.path.exists(tmpfilename) - fn = compile(f, [], gcpolicy="minimark") - assert fn() == 0 - try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) - -def test_native(): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], - separate_module_sources=[""" - RPY_EXTERN int native_func(int d) { - int j = 0; - if (d > 0) { - return native_func(d-1); - } else { - for (int i = 0; i < 42000; i++) { - j += d; - } - } - return j; - } - """]) - - native_func = rffi.llexternal("native_func", [rffi.INT], rffi.INT, - compilation_info=eci) - - class MyCode: - pass - def get_name(code): - return 'py:code:52:x' - - try: - rvmprof.register_code_object_class(MyCode, get_name) - except rvmprof.VMProfPlatformUnsupported as e: - py.test.skip(str(e)) - - @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) - def main(code, num): - if num > 0: - return main(code, num-1) - else: - return native_func(100) - - tmpfilename = str(udir.join('test_rvmprof')) - - def f(): - if NonConstant(False): - # Hack to give os.open() the correct annotation - os.open('foo', 1, 1) - code = MyCode() - rvmprof.register_code(code, get_name) - fd = os.open(tmpfilename, os.O_RDWR | os.O_CREAT, 0666) - num = 10000 - period = 0.0001 - rvmprof.enable(fd, period, native=1) - for i in range(num): - res = main(code, 3) - rvmprof.disable() - os.close(fd) - return 0 - - def check_profile(filename): + def test(self): + # XXX: this test is known to fail since rev a4f077ba651c, but buildbot + # never ran it. FIXME. from vmprof import read_profile from vmprof.show import PrettyPrinter - - prof = read_profile(filename) + assert self.rpy_entry_point(3, 0.5) == 42000 + assert self.tmpfile.check() + # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() p = PrettyPrinter() p._print_tree(tree) @@ -227,16 +196,3 @@ del not_found[i] break assert not_found == [] - - fn = compile(f, [], gcpolicy="incminimark", lldebug=True) - assert fn() == 0 - try: - import vmprof - except ImportError: - py.test.skip("vmprof unimportable") - else: - check_profile(tmpfilename) - finally: - assert os.path.exists(tmpfilename) - os.unlink(tmpfilename) - diff --git a/rpython/translator/c/test/test_standalone.py b/rpython/translator/c/test/test_standalone.py --- a/rpython/translator/c/test/test_standalone.py +++ b/rpython/translator/c/test/test_standalone.py @@ -1102,22 +1102,6 @@ assert out.strip() == 'ok' -class TestMaemo(TestStandalone): - def setup_class(cls): - py.test.skip("TestMaemo: tests skipped for now") - from rpython.translator.platform.maemo import check_scratchbox - check_scratchbox() - config = get_combined_translation_config(translating=True) - config.translation.platform = 'maemo' - cls.config = config - - def test_profopt(self): - py.test.skip("Unsupported") - - def test_prof_inline(self): - py.test.skip("Unsupported") - - class TestThread(object): gcrootfinder = 'shadowstack' config = None diff --git a/rpython/translator/platform/__init__.py b/rpython/translator/platform/__init__.py --- a/rpython/translator/platform/__init__.py +++ b/rpython/translator/platform/__init__.py @@ -320,24 +320,16 @@ else: host_factory = Cygwin64 else: - # pray - from rpython.translator.platform.distutils_platform import DistutilsPlatform - host_factory = DistutilsPlatform + raise ValueError('unknown sys.platform "%s"', sys.platform) platform = host = host_factory() def pick_platform(new_platform, cc): if new_platform == 'host': return host_factory(cc) - elif new_platform == 'maemo': - from rpython.translator.platform.maemo import Maemo - return Maemo(cc) elif new_platform == 'arm': from rpython.translator.platform.arm import ARM return ARM(cc) - elif new_platform == 'distutils': - from rpython.translator.platform.distutils_platform import DistutilsPlatform - return DistutilsPlatform() else: raise ValueError("platform = %s" % (new_platform,)) diff --git a/rpython/translator/platform/distutils_platform.py b/rpython/translator/platform/distutils_platform.py deleted file mode 100644 --- a/rpython/translator/platform/distutils_platform.py +++ /dev/null @@ -1,157 +0,0 @@ -import py, os, sys - -from rpython.translator.platform import Platform, log, CompilationError -from rpython.translator.tool import stdoutcapture - -def log_spawned_cmd(spawn): - def spawn_and_log(cmd, *args, **kwds): - log.execute(' '.join(cmd)) - return spawn(cmd, *args, **kwds) - return spawn_and_log - -CFLAGS = ['-O3'] - -if os.name != 'nt': - so_ext = 'so' -else: - so_ext = 'dll' - -class DistutilsPlatform(Platform): - """ This is a generic distutils platform. I hope it'll go away at some - point soon completely - """ - name = "distutils" - so_ext = so_ext - - def __init__(self, cc=None): - self.cc = cc - if self.name == "distutils": - self.name = sys.platform - - def _ensure_correct_math(self): - if self.name != 'win32': - return # so far - from distutils import sysconfig - gcv = sysconfig.get_config_vars() - opt = gcv.get('OPT') # not always existent - if opt and '/Op' not in opt: - opt += '/Op' - gcv['OPT'] = opt - - def compile(self, cfilenames, eci, outputfilename=None, standalone=True): - self._ensure_correct_math() - self.cfilenames = cfilenames - if standalone: - ext = '' - else: - ext = so_ext - self.standalone = standalone - self.libraries = list(eci.libraries) - self.include_dirs = list(eci.include_dirs) - self.library_dirs = list(eci.library_dirs) - self.compile_extra = list(eci.compile_extra) - self.link_extra = list(eci.link_extra) - self.frameworks = list(eci.frameworks) - if not self.name in ('win32', 'darwin', 'cygwin'): # xxx - if 'm' not in self.libraries: - self.libraries.append('m') - self.compile_extra += CFLAGS + ['-fomit-frame-pointer'] - if 'pthread' not in self.libraries: - self.libraries.append('pthread') - if self.name != 'sunos5': - self.compile_extra += ['-pthread'] - self.link_extra += ['-pthread'] - else: - self.compile_extra += ['-pthreads'] - self.link_extra += ['-lpthread'] - if self.name == 'win32': - self.link_extra += ['/DEBUG'] # generate .pdb file - if self.name == 'darwin': - # support Fink & Darwinports - for s in ('/sw/', '/opt/local/'): - if s + 'include' not in self.include_dirs and \ - os.path.exists(s + 'include'): - self.include_dirs.append(s + 'include') - if s + 'lib' not in self.library_dirs and \ - os.path.exists(s + 'lib'): - self.library_dirs.append(s + 'lib') - self.compile_extra += CFLAGS + ['-fomit-frame-pointer'] - for framework in self.frameworks: - self.link_extra += ['-framework', framework] - - if outputfilename is None: - self.outputfilename = py.path.local(cfilenames[0]).new(ext=ext) - else: - self.outputfilename = py.path.local(outputfilename) - self.eci = eci - import distutils.errors - basename = self.outputfilename.new(ext='') - data = '' - try: - saved_environ = os.environ.copy() - c = stdoutcapture.Capture(mixed_out_err=True) - try: - self._build() - finally: - # workaround for a distutils bugs where some env vars can - # become longer and longer every time it is used - for key, value in saved_environ.items(): - if os.environ.get(key) != value: - os.environ[key] = value - foutput, foutput = c.done() - data = foutput.read() - if data: - fdump = basename.new(ext='errors').open("wb") - fdump.write(data) - fdump.close() - except (distutils.errors.CompileError, - distutils.errors.LinkError): - raise CompilationError('', data) - except: - print >>sys.stderr, data - raise - return self.outputfilename - - def _build(self): - from distutils.ccompiler import new_compiler - from distutils import sysconfig - compiler = new_compiler(force=1) - if self.cc is not None: - for c in '''compiler compiler_so compiler_cxx - linker_exe linker_so'''.split(): - compiler.executables[c][0] = self.cc - if not self.standalone: - sysconfig.customize_compiler(compiler) # XXX - compiler.spawn = log_spawned_cmd(compiler.spawn) - objects = [] - for cfile in self.cfilenames: - cfile = py.path.local(cfile) - compile_extra = self.compile_extra[:] - - old = cfile.dirpath().chdir() - try: - res = compiler.compile([cfile.basename], - include_dirs=self.eci.include_dirs, - extra_preargs=compile_extra) - assert len(res) == 1 - cobjfile = py.path.local(res[0]) - assert cobjfile.check() - objects.append(str(cobjfile)) - finally: - old.chdir() - - if self.standalone: - cmd = compiler.link_executable - else: - cmd = compiler.link_shared_object - cmd(objects, str(self.outputfilename), - libraries=self.eci.libraries, - extra_preargs=self.link_extra, - library_dirs=self.eci.library_dirs) - - def _include_dirs_for_libffi(self): - return ['/usr/include/libffi'] - - def _library_dirs_for_libffi(self): - return ['/usr/lib/libffi'] - diff --git a/rpython/translator/platform/maemo.py b/rpython/translator/platform/maemo.py deleted file mode 100644 --- a/rpython/translator/platform/maemo.py +++ /dev/null @@ -1,95 +0,0 @@ -"""Support for Maemo.""" - -import py, os - -from rpython.tool.udir import udir -from rpython.translator.platform import ExecutionResult, log -from rpython.translator.platform.linux import Linux -from rpython.translator.platform.posix import GnuMakefile, _run_subprocess - -def check_scratchbox(): - # in order to work, that file must exist and be executable by us - if not os.access('/scratchbox/login', os.X_OK): - py.test.skip("No scratchbox detected") - -class Maemo(Linux): - name = "maemo" - - available_includedirs = ('/usr/include', '/tmp') - copied_cache = {} - - def _invent_new_name(self, basepath, base): - pth = basepath.join(base) - num = 0 - while pth.check(): - pth = basepath.join('%s_%d' % (base,num)) - num += 1 - return pth.ensure(dir=1) - - def _copy_files_to_new_dir(self, dir_from, pattern='*.[ch]'): - try: - return self.copied_cache[dir_from] - except KeyError: - new_dirpath = self._invent_new_name(udir, 'copied_includes') - files = py.path.local(dir_from).listdir(pattern) - for f in files: - f.copy(new_dirpath) - # XXX - srcdir = py.path.local(dir_from).join('src') - if srcdir.check(dir=1): - target = new_dirpath.join('src').ensure(dir=1) - for f in srcdir.listdir(pattern): - f.copy(target) - # XXX - self.copied_cache[dir_from] = new_dirpath - return new_dirpath - - def _preprocess_include_dirs(self, include_dirs): - """ Tweak includedirs so they'll be available through scratchbox - """ - res_incl_dirs = [] - for incl_dir in include_dirs: - incl_dir = py.path.local(incl_dir) - for available in self.available_includedirs: - if incl_dir.relto(available): - res_incl_dirs.append(str(incl_dir)) - break - else: - # we need to copy files to a place where it's accessible - res_incl_dirs.append(self._copy_files_to_new_dir(incl_dir)) - return res_incl_dirs - - def _execute_c_compiler(self, cc, args, outname): - log.execute('/scratchbox/login ' + cc + ' ' + ' '.join(args)) - args = [cc] + args - returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args) - self._handle_error(returncode, stdout, stderr, outname) - - def execute(self, executable, args=[], env=None): - if isinstance(args, str): - args = str(executable) + ' ' + args - log.message('executing /scratchbox/login ' + args) - else: - args = [str(executable)] + args - log.message('executing /scratchbox/login ' + ' '.join(args)) - returncode, stdout, stderr = _run_subprocess('/scratchbox/login', args, - env) - return ExecutionResult(returncode, stdout, stderr) - - def _include_dirs_for_libffi(self): - # insanely obscure dir - return ['/usr/include/arm-linux-gnueabi/'] - - def _library_dirs_for_libffi(self): - # on the other hand, library lands in usual place... - return [] - - def execute_makefile(self, path_to_makefile, extra_opts=[]): - if isinstance(path_to_makefile, GnuMakefile): - path = path_to_makefile.makefile_dir - else: - path = path_to_makefile - log.execute('make %s in %s' % (" ".join(extra_opts), path)) - returncode, stdout, stderr = _run_subprocess( - '/scratchbox/login', ['make', '-C', str(path)] + extra_opts) - self._handle_error(returncode, stdout, stderr, path.join('make')) diff --git a/rpython/translator/platform/test/test_distutils.py b/rpython/translator/platform/test/test_distutils.py deleted file mode 100644 --- a/rpython/translator/platform/test/test_distutils.py +++ /dev/null @@ -1,17 +0,0 @@ - -from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest -from rpython.translator.platform.distutils_platform import DistutilsPlatform -import py - -class TestDistutils(BasicTest): - platform = DistutilsPlatform() - - def test_nice_errors(self): - py.test.skip("Unsupported") - - def test_900_files(self): - py.test.skip('Makefiles not suppoerted') - - def test_precompiled_headers(self): - py.test.skip('Makefiles not suppoerted') - diff --git a/rpython/translator/platform/test/test_maemo.py b/rpython/translator/platform/test/test_maemo.py deleted file mode 100644 --- a/rpython/translator/platform/test/test_maemo.py +++ /dev/null @@ -1,37 +0,0 @@ - -""" File containing maemo platform tests -""" - -import py -from rpython.tool.udir import udir -from rpython.translator.platform.maemo import Maemo, check_scratchbox -from rpython.translator.platform.test.test_platform import TestPlatform as BasicTest -from rpython.translator.tool.cbuild import ExternalCompilationInfo - -class TestMaemo(BasicTest): - platform = Maemo() - strict_on_stderr = False - - def setup_class(cls): - py.test.skip("TestMaemo: tests skipped for now") - check_scratchbox() - - def test_includes_outside_scratchbox(self): - cfile = udir.join('test_includes_outside_scratchbox.c') - cfile.write(''' - #include - #include "test.h" - int main() - { - printf("%d\\n", XXX_STUFF); - return 0; - } - ''') - includedir = py.path.local(__file__).dirpath().join('include') - eci = ExternalCompilationInfo(include_dirs=(includedir,)) - executable = self.platform.compile([cfile], eci) - res = self.platform.execute(executable) - self.check_res(res) - - def test_environment_inheritance(self): - py.test.skip("FIXME") diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -147,10 +147,13 @@ def test_is_host_build(): + from platform import machine from rpython.translator import platform assert platform.host == platform.platform assert platform.is_host_build() - platform.set_platform('maemo', None) - assert platform.host != platform.platform - assert not platform.is_host_build() + # do we support non-host builds? + if machine().startswith('arm'): + platform.set_platform('arm', None) + assert platform.host != platform.platform + assert not platform.is_host_build() diff --git a/rpython/translator/platform/test/test_posix.py b/rpython/translator/platform/test/test_posix.py --- a/rpython/translator/platform/test/test_posix.py +++ b/rpython/translator/platform/test/test_posix.py @@ -64,10 +64,3 @@ assert 'INCLUDEDIRS = %s/foo/baz/include' % include_prefix in Makefile assert 'LIBDIRS = %s/foo/baz/lib' % lib_prefix in Makefile -class TestMaemo(TestMakefile): - strict_on_stderr = False - - def setup_class(cls): - from rpython.translator.platform.maemo import check_scratchbox, Maemo - check_scratchbox() - cls.platform = Maemo() diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py --- a/rpython/translator/translator.py +++ b/rpython/translator/translator.py @@ -141,6 +141,9 @@ if isinstance(func, FunctionGraph): return func result = [] + if hasattr(func, 'im_func'): + # make it possible to translate bound methods + func = func.im_func for graph in translator.graphs: if getattr(graph, 'func', None) is func: result.append(graph) From pypy.commits at gmail.com Mon Nov 13 16:10:06 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 13:10:06 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Return True from inspect.isbuiltin() for functions implemented in C Message-ID: <5a0a0a2e.4eb6df0a.d7dd1.2f91@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93007:06fb68203b17 Date: 2017-11-13 21:09 +0000 http://bitbucket.org/pypy/pypy/changeset/06fb68203b17/ Log: Return True from inspect.isbuiltin() for functions implemented in C diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py --- a/lib-python/3/inspect.py +++ b/lib-python/3/inspect.py @@ -49,6 +49,10 @@ import builtins from operator import attrgetter from collections import namedtuple, OrderedDict +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # Create constants for the compiler flags in Include/code.h # We try to get them from dis to avoid duplication @@ -262,7 +266,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" From pypy.commits at gmail.com Mon Nov 13 16:33:11 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 13 Nov 2017 13:33:11 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Skip (parts of) tests that require _pickle Message-ID: <5a0a0f97.26acdf0a.6d561.7b3a@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93008:fc81c46b1987 Date: 2017-11-13 21:32 +0000 http://bitbucket.org/pypy/pypy/changeset/fc81c46b1987/ Log: Skip (parts of) tests that require _pickle diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -32,6 +32,8 @@ from test.support import check_impl_detail from test.test_import import _ready_to_import +if check_impl_detail(): + import _pickle # Functions tested in this suite: @@ -755,12 +757,12 @@ @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_getfullargspec_builtin_methods(self): - import _pickle - self.assertFullArgSpecEquals(_pickle.Pickler.dump, - args_e=['self', 'obj'], formatted='(self, obj)') - - self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump, - args_e=['self', 'obj'], formatted='(self, obj)') + if check_impl_detail(): + self.assertFullArgSpecEquals(_pickle.Pickler.dump, + args_e=['self', 'obj'], formatted='(self, obj)') + + self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump, + args_e=['self', 'obj'], formatted='(self, obj)') self.assertFullArgSpecEquals( os.stat, @@ -1961,7 +1963,6 @@ "Signature information for builtins requires docstrings") def test_signature_on_builtins(self): import _testcapi - import _pickle def test_unbound_method(o): """Use this to test unbound methods (things that should have a self)""" @@ -1995,9 +1996,10 @@ # normal method # (PyMethodDescr_Type, "method_descriptor") - test_unbound_method(_pickle.Pickler.dump) - d = _pickle.Pickler(io.StringIO()) - test_callable(d.dump) + if check_impl_detail(): + test_unbound_method(_pickle.Pickler.dump) + d = _pickle.Pickler(io.StringIO()) + test_callable(d.dump) # static method test_callable(str.maketrans) @@ -2627,10 +2629,10 @@ with self.assertRaisesRegex(ValueError, "callable.*is not supported"): self.assertEqual(inspect.signature(D), None) + @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_on_builtin_class(self): - import _pickle self.assertEqual(str(inspect.signature(_pickle.Pickler)), '(file, protocol=None, fix_imports=True)') @@ -2876,10 +2878,10 @@ foo_sig = MySignature.from_callable(foo) self.assertTrue(isinstance(foo_sig, MySignature)) + @cpython_only @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") def test_signature_from_callable_builtin_obj(self): - import _pickle class MySignature(inspect.Signature): pass sig = MySignature.from_callable(_pickle.Pickler) self.assertTrue(isinstance(sig, MySignature)) From pypy.commits at gmail.com Mon Nov 13 17:45:56 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 13 Nov 2017 14:45:56 -0800 (PST) Subject: [pypy-commit] pypy default: Merged in tpruzina/pypy (pull request #581) Message-ID: <5a0a20a4.51bbdf0a.dbb51.7cf7@mx.google.com> Author: Armin Rigo Branch: Changeset: r93010:27b914ed1ea1 Date: 2017-11-13 22:45 +0000 http://bitbucket.org/pypy/pypy/changeset/27b914ed1ea1/ Log: Merged in tpruzina/pypy (pull request #581) fix detect_pax behavior on linux if procfs is mounted with hidepid>=1 Approved-by: Vadim A. Misbakh-Soloviov diff --git a/rpython/config/support.py b/rpython/config/support.py --- a/rpython/config/support.py +++ b/rpython/config/support.py @@ -41,8 +41,8 @@ Function to determine if your system comes with PAX protection. """ if sys.platform.startswith('linux'): - # we need a running process PID and 1 is always running - with open("/proc/1/status") as fd: + # use PID of current process for the check + with open("/proc/self/status") as fd: data = fd.read() if 'PaX' in data: return True From pypy.commits at gmail.com Mon Nov 13 17:46:06 2017 From: pypy.commits at gmail.com (=?utf-8?b?VG9tw6HFoSBQcnXFvmluYSA8bm9yZXBseUBidWlsZGJvdC5weXB5Lm9y?= =?utf-8?q?g=3E?=) Date: Mon, 13 Nov 2017 14:46:06 -0800 (PST) Subject: [pypy-commit] pypy default: fix detect_pax behavior on linux where procfs is mounted with hidepid>=1 Message-ID: <5a0a20ae.83c4df0a.708b4.1e10@mx.google.com> Author: Tomáš Pružina Branch: Changeset: r93009:368d2eef1229 Date: 2017-11-05 05:46 +0100 http://bitbucket.org/pypy/pypy/changeset/368d2eef1229/ Log: fix detect_pax behavior on linux where procfs is mounted with hidepid>=1 PID1 (init) isn't observable on systems with procfs mounted with hidepid=1,2 unless build runs under root (for example on Gentoo where package manager compiles under user 'portage'). This can be fixed by replacing /proc/1/status with /proc/self/status (which is visible to the build script). diff --git a/rpython/config/support.py b/rpython/config/support.py --- a/rpython/config/support.py +++ b/rpython/config/support.py @@ -41,8 +41,8 @@ Function to determine if your system comes with PAX protection. """ if sys.platform.startswith('linux'): - # we need a running process PID and 1 is always running - with open("/proc/1/status") as fd: + # use PID of current process for the check + with open("/proc/self/status") as fd: data = fd.read() if 'PaX' in data: return True From pypy.commits at gmail.com Tue Nov 14 04:20:23 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 14 Nov 2017 01:20:23 -0800 (PST) Subject: [pypy-commit] pypy memory-accounting: add tracking of memory Message-ID: <5a0ab557.49c71c0a.fc848.8678@mx.google.com> Author: fijal Branch: memory-accounting Changeset: r93011:5e198814c5f6 Date: 2017-11-14 10:19 +0100 http://bitbucket.org/pypy/pypy/changeset/5e198814c5f6/ Log: add tracking of memory diff --git a/pypy/module/gc/__init__.py b/pypy/module/gc/__init__.py --- a/pypy/module/gc/__init__.py +++ b/pypy/module/gc/__init__.py @@ -19,6 +19,7 @@ space.config.translation.gctransformer == "framework"): self.appleveldefs.update({ 'dump_rpy_heap': 'app_referents.dump_rpy_heap', + 'get_stats': 'app_referents.get_stats', }) self.interpleveldefs.update({ 'get_rpy_roots': 'referents.get_rpy_roots', @@ -28,7 +29,7 @@ 'get_objects': 'referents.get_objects', 'get_referents': 'referents.get_referents', 'get_referrers': 'referents.get_referrers', - 'get_stats': 'referents.get_stats', + '_get_stats': 'referents.get_stats', '_dump_rpy_heap': 'referents._dump_rpy_heap', 'get_typeids_z': 'referents.get_typeids_z', 'get_typeids_list': 'referents.get_typeids_list', diff --git a/pypy/module/gc/app_referents.py b/pypy/module/gc/app_referents.py --- a/pypy/module/gc/app_referents.py +++ b/pypy/module/gc/app_referents.py @@ -48,3 +48,42 @@ file.flush() fd = file.fileno() gc._dump_rpy_heap(fd) + +class GcStats(object): + def __init__(self, s): + self._s = s + for item in ('total_gc_memory', 'jit_backend_used', 'total_memory_pressure', + 'total_allocated_memory', 'jit_backend_allocated'): + setattr(self, item, self._format(getattr(self._s, item))) + self.memory_used_sum = self._format(self._s.total_gc_memory + self._s.total_memory_pressure + + self._s.jit_backend_used) + self.memory_allocated_sum = self._format(self._s.total_allocated_memory + self._s.total_memory_pressure + + self._s.jit_backend_allocated) + + def _format(self, v): + if v < 1000000: + # bit unlikely ;-) + return "%.1fkB" % (v / 1024.) + return "%.1fMB" % (v / 1024. / 1024.) + + def repr(self): + return """Total memory consumed: +GC used: %s +raw assembler used: %s +memory pressure: %s +----------------------------- +Total: %s + +Total memory allocated: +GC allocated: %s +raw assembler allocated: %s +memory pressure: %s +----------------------------- +Total: %s +""" % (self.total_gc_memory, self.jit_backend_used, self.total_memory_pressure, + self.memory_used_sum, + self.total_allocated_memory, self.jit_backend_allocated, self.total_memory_pressure, + self.memory_allocated_sum) + +def get_stats(): + return GcStats(gc._get_stats()) diff --git a/pypy/module/gc/referents.py b/pypy/module/gc/referents.py --- a/pypy/module/gc/referents.py +++ b/pypy/module/gc/referents.py @@ -1,7 +1,7 @@ -from rpython.rlib import rgc +from rpython.rlib import rgc, jit_hooks from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.typedef import TypeDef, interp_attrproperty -from pypy.interpreter.gateway import unwrap_spec +from pypy.interpreter.gateway import unwrap_spec, interp2app from pypy.interpreter.error import oefmt, wrap_oserror from rpython.rlib.objectmodel import we_are_translated @@ -175,12 +175,21 @@ def __init__(self): self.total_memory_pressure = rgc.get_stats(rgc.TOTAL_MEMORY_PRESSURE) self.total_gc_memory = rgc.get_stats(rgc.TOTAL_MEMORY) + self.total_allocated_memory = rgc.get_stats(rgc.TOTAL_ALLOCATED_MEMORY) + self.jit_backend_allocated = jit_hooks.stats_asmmemmgr_allocated(None) + self.jit_backend_used = jit_hooks.stats_asmmemmgr_used(None) W_GcStats.typedef = TypeDef("GcStats", total_memory_pressure=interp_attrproperty("total_memory_pressure", cls=W_GcStats, wrapfn="newint"), total_gc_memory=interp_attrproperty("total_gc_memory", - cls=W_GcStats, wrapfn="newint") + cls=W_GcStats, wrapfn="newint"), + total_allocated_memory=interp_attrproperty("total_allocated_memory", + cls=W_GcStats, wrapfn="newint"), + jit_backend_allocated=interp_attrproperty("jit_backend_allocated", + cls=W_GcStats, wrapfn="newint"), + jit_backend_used=interp_attrproperty("jit_backend_used", + cls=W_GcStats, wrapfn="newint"), ) def get_stats(space): diff --git a/rpython/memory/gc/incminimark.py b/rpython/memory/gc/incminimark.py --- a/rpython/memory/gc/incminimark.py +++ b/rpython/memory/gc/incminimark.py @@ -1184,6 +1184,11 @@ """ return self.ac.total_memory_used + self.rawmalloced_total_size + def get_total_memory_alloced(self): + """ Return the total memory allocated + """ + return self.ac.total_memory_alloced + self.rawmalloced_total_size + def threshold_reached(self, extra=0): return (self.next_major_collection_threshold - float(self.get_total_memory_used())) < float(extra) @@ -2925,7 +2930,7 @@ if stats_no == rgc.TOTAL_MEMORY: return intmask(self.get_total_memory_used() + self.nursery_size) elif stats_no == rgc.TOTAL_ALLOCATED_MEMORY: - return 0 + return intmask(self.get_total_memory_alloced() + self.nursery_size) elif stats_no == rgc.TOTAL_MEMORY_PRESSURE: return inspector.count_memory_pressure(self) return 0 diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py --- a/rpython/memory/gc/minimarkpage.py +++ b/rpython/memory/gc/minimarkpage.py @@ -294,6 +294,7 @@ # 'arena_base' points to the start of malloced memory; it might not # be a page-aligned address arena_base = llarena.arena_malloc(self.arena_size, False) + self.total_memory_alloced += self.arena_size if not arena_base: out_of_memory("out of memory: couldn't allocate the next arena") arena_end = arena_base + self.arena_size @@ -398,6 +399,7 @@ # The whole arena is empty. Free it. llarena.arena_reset(arena.base, self.arena_size, 4) llarena.arena_free(arena.base) + self.total_memory_alloced -= self.arena_size lltype.free(arena, flavor='raw', track_allocation=False) # else: From pypy.commits at gmail.com Tue Nov 14 05:19:03 2017 From: pypy.commits at gmail.com (stian) Date: Tue, 14 Nov 2017 02:19:03 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Don't return a copy on long // 1 Message-ID: <5a0ac317.26acdf0a.6d561.ad34@mx.google.com> Author: stian Branch: math-improvements Changeset: r93012:9838b9ca2938 Date: 2017-11-14 11:18 +0100 http://bitbucket.org/pypy/pypy/changeset/9838b9ca2938/ Log: Don't return a copy on long // 1 diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -788,8 +788,8 @@ if self.sign == 1 and other.numdigits() == 1 and other.sign == 1: digit = other.digit(0) if digit == 1: - return rbigint(self._digits[:self.numdigits()], 1, self.numdigits()) - elif digit and digit & (digit - 1) == 0: + return self + elif digit & (digit - 1) == 0: return self.rqshift(ptwotable[digit]) div, mod = _divrem(self, other) From pypy.commits at gmail.com Tue Nov 14 11:03:46 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 08:03:46 -0800 (PST) Subject: [pypy-commit] pypy py3.5: backout b95f1240ad90: this was fixed in CPython 3.* Message-ID: <5a0b13e2.8dbbdf0a.7319.a330@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93013:714cdd09fc99 Date: 2017-11-14 16:03 +0000 http://bitbucket.org/pypy/pypy/changeset/714cdd09fc99/ Log: backout b95f1240ad90: this was fixed in CPython 3.* diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -87,17 +87,13 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if it.skip(pygram.tokens.STRING): + it.skip_newlines() - docstring_possible = True - while True: - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if docstring_possible and it.skip(pygram.tokens.STRING): - it.skip_newlines() - docstring_possible = False - if not (it.skip_name("from") and + while (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): - break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -193,13 +193,3 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == 0 - -def test_future_doc_future(): - # for some reason people do this :-[ - s = ''' -from __future__ import generators -"Docstring" -from __future__ import division - ''' - f = run(s, (4, 24)) - assert f == 0 From pypy.commits at gmail.com Tue Nov 14 11:31:52 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 08:31:52 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Allow inspect._signature_from_callable() to work on builtins (by handling them like user functions) Message-ID: <5a0b1a78.17711c0a.8005c.71ef@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93014:5d5dfbb116aa Date: 2017-11-14 16:31 +0000 http://bitbucket.org/pypy/pypy/changeset/5d5dfbb116aa/ Log: Allow inspect._signature_from_callable() to work on builtins (by handling them like user functions) diff --git a/lib-python/3/inspect.py b/lib-python/3/inspect.py --- a/lib-python/3/inspect.py +++ b/lib-python/3/inspect.py @@ -1828,7 +1828,7 @@ kwdefaults = getattr(obj, '__kwdefaults__', _void) # ... and not None here annotations = getattr(obj, '__annotations__', None) - return (isinstance(code, types.CodeType) and + return (isinstance(code, (types.CodeType, _builtin_code_type)) and isinstance(name, str) and (defaults is None or isinstance(defaults, tuple)) and (kwdefaults is None or isinstance(kwdefaults, dict)) and From pypy.commits at gmail.com Tue Nov 14 11:41:17 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 08:41:17 -0800 (PST) Subject: [pypy-commit] pypy py3.5: update test for PyPy Message-ID: <5a0b1cad.499edf0a.3360b.c8c9@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93015:b05acdc71ad8 Date: 2017-11-14 16:40 +0000 http://bitbucket.org/pypy/pypy/changeset/b05acdc71ad8/ Log: update test for PyPy diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -816,7 +816,9 @@ attrs = attrs_wo_objs(A) - self.assertIn(('__new__', 'method', object), attrs, 'missing __new__') + # changed in PyPy + self.assertIn(('__new__', 'static method', object), attrs, 'missing __new__') + self.assertIn(('__init__', 'method', object), attrs, 'missing __init__') self.assertIn(('s', 'static method', A), attrs, 'missing static method') From pypy.commits at gmail.com Tue Nov 14 12:38:36 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 09:38:36 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Correctly compute .co_kwonlyargcount on BuiltinCode objects Message-ID: <5a0b2a1c.c7c61c0a.61060.6e8c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93016:d129c0d2de48 Date: 2017-11-14 17:38 +0000 http://bitbucket.org/pypy/pypy/changeset/d129c0d2de48/ Log: Correctly compute .co_kwonlyargcount on BuiltinCode objects diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py --- a/pypy/interpreter/test/test_gateway.py +++ b/pypy/interpreter/test/test_gateway.py @@ -18,7 +18,7 @@ class TestBuiltinCode: - def test_signature(self): + def test_signature(self, space): def c(space, w_x, w_y, hello_w): pass code = gateway.BuiltinCode(c, unwrap_spec=[gateway.ObjSpace, @@ -53,6 +53,8 @@ code = gateway.BuiltinCode(f, unwrap_spec=[gateway.ObjSpace, "kwonly", W_Root]) assert code.signature() == Signature([], kwonlyargnames=['x']) + assert space.int_w(space.getattr( + code, space.newtext('co_kwonlyargcount'))) == 1 def test_call(self): diff --git a/pypy/interpreter/typedef.py b/pypy/interpreter/typedef.py --- a/pypy/interpreter/typedef.py +++ b/pypy/interpreter/typedef.py @@ -538,6 +538,9 @@ def fget_co_argcount(space, code): # unwrapping through unwrap_spec return space.newint(code.signature().num_argnames()) +def fget_co_kwonlyargcount(space, code): # unwrapping through unwrap_spec + return space.newint(code.signature().num_kwonlyargnames()) + def fget_zero(space, code): return space.newint(0) @@ -597,7 +600,7 @@ co_name = interp_attrproperty('co_name', cls=BuiltinCode, wrapfn="newtext_or_none"), co_varnames = GetSetProperty(fget_co_varnames, cls=BuiltinCode), co_argcount = GetSetProperty(fget_co_argcount, cls=BuiltinCode), - co_kwonlyargcount = GetSetProperty(fget_zero, cls=BuiltinCode), + co_kwonlyargcount = GetSetProperty(fget_co_kwonlyargcount, cls=BuiltinCode), co_flags = GetSetProperty(fget_co_flags, cls=BuiltinCode), co_consts = GetSetProperty(fget_co_consts, cls=BuiltinCode), ) From pypy.commits at gmail.com Tue Nov 14 13:54:28 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 10:54:28 -0800 (PST) Subject: [pypy-commit] pypy py3.5: ignoring test_finddoc seems acceptable Message-ID: <5a0b3be4.08b51c0a.c1328.9f1c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93017:f0a8cba78c24 Date: 2017-11-14 18:54 +0000 http://bitbucket.org/pypy/pypy/changeset/f0a8cba78c24/ Log: ignoring test_finddoc seems acceptable diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -364,6 +364,7 @@ self.assertEqual(inspect.getdoc(mod.FesteringGob.contradiction), 'The automatic gainsaying.') + @cpython_only # XXX: _finddoc() is broken on PyPy, but getdoc() seems OK @unittest.skipIf(MISSING_C_DOCSTRINGS, "test requires docstrings") def test_finddoc(self): finddoc = inspect._finddoc From pypy.commits at gmail.com Tue Nov 14 14:01:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 11:01:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: tweak test Message-ID: <5a0b3da1.05d31c0a.2da46.9bac@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93018:1465066182bd Date: 2017-11-14 19:01 +0000 http://bitbucket.org/pypy/pypy/changeset/1465066182bd/ Log: tweak test diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -2023,7 +2023,7 @@ # This doesn't work now. # (We don't have a valid signature for "type" in 3.4) - with self.assertRaisesRegex(ValueError, "no signature found"): + with self.assertRaisesRegex(ValueError, "signature"): class ThisWorksNow: __call__ = type test_callable(ThisWorksNow()) From pypy.commits at gmail.com Tue Nov 14 14:51:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 11:51:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: PyUnicode_FromObject only works on instances of str Message-ID: <5a0b495b.4fcb1c0a.8a596.3c40@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93019:a3c86e99e3e4 Date: 2017-11-14 19:51 +0000 http://bitbucket.org/pypy/pypy/changeset/a3c86e99e3e4/ Log: PyUnicode_FromObject only works on instances of str diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -319,6 +319,20 @@ assert module.unsafe_len(u'aАbБcСdД') == 8 assert module.unsafe_len(u"café\U0001F4A9") == 5 + def test_FromObject(self): + module = self.import_extension('foo', [ + ("from_object", "METH_O", + """ + return PyUnicode_FromObject(args); + """)]) + class my_str(str): pass + assert module.from_object('abc') == 'abc' + res = module.from_object(my_str('abc')) + assert type(res) is str + assert res == 'abc' + raises(TypeError, module.from_object, b'abc') + raises(TypeError, module.from_object, 42) + class TestUnicode(BaseApiTest): def test_unicodeobject(self, space): @@ -500,6 +514,12 @@ assert ret == Py_CLEANUP_SUPPORTED assert space.isinstance_w(from_ref(space, result[0]), space.w_bytes) assert PyUnicode_FSDecoder(space, None, result) == 1 + # Input is invalid + w_input = space.newint(42) + with lltype.scoped_alloc(PyObjectP.TO, 1) as result: + with pytest.raises(OperationError): + PyUnicode_FSConverter(space, w_input, result) + def test_IS(self, space): for char in [0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x1c, 0x1d, 0x1e, 0x1f, diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py --- a/pypy/module/cpyext/unicodeobject.py +++ b/pypy/module/cpyext/unicodeobject.py @@ -505,12 +505,19 @@ @cpython_api([PyObject], PyObject) def PyUnicode_FromObject(space, w_obj): - """Shortcut for PyUnicode_FromEncodedObject(obj, NULL, "strict") which is used - throughout the interpreter whenever coercion to Unicode is needed.""" + """Copy an instance of a Unicode subtype to a new true Unicode object if + necessary. If obj is already a true Unicode object (not a subtype), return + the reference with incremented refcount. + + Objects other than Unicode or its subtypes will cause a TypeError. + """ if space.is_w(space.type(w_obj), space.w_unicode): return w_obj + elif space.isinstance_w(w_obj, space.w_unicode): + return space.call_function(space.w_unicode, w_obj) else: - return space.call_function(space.w_unicode, w_obj) + raise oefmt(space.w_TypeError, + "Can't convert '%T' object to str implicitly", w_obj) @cpython_api([PyObject, CONST_STRING, CONST_STRING], PyObject) def PyUnicode_FromEncodedObject(space, w_obj, encoding, errors): From pypy.commits at gmail.com Tue Nov 14 15:13:11 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 12:13:11 -0800 (PST) Subject: [pypy-commit] pypy py3.5: skip or tweak tests Message-ID: <5a0b4e57.8faedf0a.e4890.3f32@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93020:6f9bbe108de5 Date: 2017-11-14 20:12 +0000 http://bitbucket.org/pypy/pypy/changeset/6f9bbe108de5/ Log: skip or tweak tests diff --git a/lib-python/3/test/test_capi.py b/lib-python/3/test/test_capi.py --- a/lib-python/3/test/test_capi.py +++ b/lib-python/3/test/test_capi.py @@ -53,6 +53,8 @@ self.assertEqual(testfunction.attribute, "test") self.assertRaises(AttributeError, setattr, inst.testfunction, "attribute", "test") + @unittest.skipIf(support.check_impl_detail(pypy=True), + "doesn't crash on PyPy") @unittest.skipUnless(threading, 'Threading required for this test.') def test_no_FatalError_infinite_loop(self): with support.SuppressCrashReport(): @@ -205,9 +207,9 @@ else: with self.assertRaises(SystemError) as cm: _testcapi.return_null_without_error() + # PyPy change: different message self.assertRegex(str(cm.exception), - 'return_null_without_error.* ' - 'returned NULL without setting an error') + 'Function returned a NULL result without setting an exception') def test_return_result_with_error(self): # Issue #23571: A function must not return a result with an error set @@ -237,9 +239,9 @@ else: with self.assertRaises(SystemError) as cm: _testcapi.return_result_with_error() + # PyPy change: different message self.assertRegex(str(cm.exception), - 'return_result_with_error.* ' - 'returned a result with an error set') + 'An exception was set, but function returned a value') def test_buildvalue_N(self): _testcapi.test_buildvalue_N() @@ -327,6 +329,8 @@ self.pendingcalls_wait(l, n) + at unittest.skipIf(support.check_impl_detail(pypy=True), + "subinterpreters not implemented on PyPy") class SubinterpreterTest(unittest.TestCase): def test_subinterps(self): From pypy.commits at gmail.com Tue Nov 14 16:00:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 13:00:21 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Unskip _testcapi tests that should pass, skip those that cannot Message-ID: <5a0b5965.d18d1c0a.7250a.a931@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93021:66f524561285 Date: 2017-11-14 20:59 +0000 http://bitbucket.org/pypy/pypy/changeset/66f524561285/ Log: Unskip _testcapi tests that should pass, skip those that cannot diff --git a/lib-python/3/test/test_capi.py b/lib-python/3/test/test_capi.py --- a/lib-python/3/test/test_capi.py +++ b/lib-python/3/test/test_capi.py @@ -29,8 +29,9 @@ skips = [] if support.check_impl_detail(pypy=True): skips += [ - 'test_widechar', - ] + 'test_lazy_hash_inheritance', + 'test_capsule', + ] def testfunction(self): """some doc""" diff --git a/lib_pypy/_testcapimodule.c b/lib_pypy/_testcapimodule.c --- a/lib_pypy/_testcapimodule.c +++ b/lib_pypy/_testcapimodule.c @@ -2818,8 +2818,6 @@ return PyMemoryView_FromBuffer(&info); } -#ifndef PYPY_VERSION - static PyObject * test_from_contiguous(PyObject* self, PyObject *noargs) { @@ -2869,7 +2867,6 @@ Py_RETURN_NONE; } -#endif /* PYPY_VERSION */ #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION) extern PyTypeObject _PyBytesIOBuffer_Type; @@ -3907,9 +3904,7 @@ {"test_string_to_double", (PyCFunction)test_string_to_double, METH_NOARGS}, {"test_unicode_compare_with_ascii", (PyCFunction)test_unicode_compare_with_ascii, METH_NOARGS}, {"test_capsule", (PyCFunction)test_capsule, METH_NOARGS}, -#ifndef PYPY_VERSION {"test_from_contiguous", (PyCFunction)test_from_contiguous, METH_NOARGS}, -#endif #if (defined(__linux__) || defined(__FreeBSD__)) && defined(__GNUC__) && !defined(PYPY_VERSION) {"test_pep3118_obsolete_write_locks", (PyCFunction)test_pep3118_obsolete_write_locks, METH_NOARGS}, #endif From pypy.commits at gmail.com Tue Nov 14 16:14:47 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 13:14:47 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Check for NULL in PyMemoryView_FromBuffer Message-ID: <5a0b5cc7.4eb6df0a.d7dd1.558a@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93022:57dc41aeb601 Date: 2017-11-14 21:14 +0000 http://bitbucket.org/pypy/pypy/changeset/57dc41aeb601/ Log: Check for NULL in PyMemoryView_FromBuffer diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py --- a/pypy/module/cpyext/memoryobject.py +++ b/pypy/module/cpyext/memoryobject.py @@ -201,6 +201,10 @@ The memoryview object then owns the buffer represented by view, which means you shouldn't try to call PyBuffer_Release() yourself: it will be done on deallocation of the memoryview object.""" + if not view.c_buf: + raise oefmt(space.w_ValueError, + "PyMemoryView_FromBuffer(): info->buf must not be NULL") + # XXX this should allocate a PyMemoryViewObject and # copy view into obj.c_view, without creating a new view.c_obj typedescr = get_typedescr(W_MemoryView.typedef) diff --git a/pypy/module/cpyext/test/test_memoryobject.py b/pypy/module/cpyext/test/test_memoryobject.py --- a/pypy/module/cpyext/test/test_memoryobject.py +++ b/pypy/module/cpyext/test/test_memoryobject.py @@ -255,3 +255,13 @@ """)]) mv = module.new() assert mv.tobytes() == b'hell' + + def test_FromBuffer_NULL(self): + module = self.import_extension('foo', [ + ('new', 'METH_NOARGS', """ + Py_buffer info; + if (PyBuffer_FillInfo(&info, NULL, NULL, 1, 1, PyBUF_FULL_RO) < 0) + return NULL; + return PyMemoryView_FromBuffer(&info); + """)]) + raises(ValueError, module.new) From pypy.commits at gmail.com Tue Nov 14 16:43:25 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 13:43:25 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Define SIZEOF_WCHAR_T in pyconfig.h and copy CPython logic for the related Py_UNICODE_XXX defines Message-ID: <5a0b637d.cc091c0a.7ba4e.4202@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93023:69a07b055bdd Date: 2017-11-14 21:43 +0000 http://bitbucket.org/pypy/pypy/changeset/69a07b055bdd/ Log: Define SIZEOF_WCHAR_T in pyconfig.h and copy CPython logic for the related Py_UNICODE_XXX defines diff --git a/pypy/module/cpyext/include/pyconfig.h b/pypy/module/cpyext/include/pyconfig.h --- a/pypy/module/cpyext/include/pyconfig.h +++ b/pypy/module/cpyext/include/pyconfig.h @@ -21,10 +21,9 @@ /* PyPy supposes Py_UNICODE == wchar_t */ #define HAVE_USABLE_WCHAR_T 1 #ifndef _WIN32 -#define Py_UNICODE_SIZE 4 -#define Py_UNICODE_WIDE +#define SIZEOF_WCHAR_T 4 #else -#define Py_UNICODE_SIZE 2 +#define SIZEOF_WCHAR_T 2 #endif #ifndef _WIN32 diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h --- a/pypy/module/cpyext/include/unicodeobject.h +++ b/pypy/module/cpyext/include/unicodeobject.h @@ -1,6 +1,25 @@ #ifndef Py_UNICODEOBJECT_H #define Py_UNICODEOBJECT_H +#ifndef SIZEOF_WCHAR_T +#error Must define SIZEOF_WCHAR_T +#endif + +#define Py_UNICODE_SIZE SIZEOF_WCHAR_T + +/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. + Otherwise, Unicode strings are stored as UCS-2 (with limited support + for UTF-16) */ + +#if Py_UNICODE_SIZE >= 4 +#define Py_UNICODE_WIDE +#endif + +/* Set these flags if the platform has "wchar.h" and the + wchar_t type is a 16-bit unsigned type */ +/* #define HAVE_WCHAR_H */ +/* #define HAVE_USABLE_WCHAR_T */ + #ifdef __cplusplus extern "C" { #endif diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -333,6 +333,29 @@ raises(TypeError, module.from_object, b'abc') raises(TypeError, module.from_object, 42) + def test_widechar(self): + module = self.import_extension('foo', [ + ("make_wide", "METH_NOARGS", + """ + #if defined(SIZEOF_WCHAR_T) && (SIZEOF_WCHAR_T == 4) + const wchar_t wtext[2] = {(wchar_t)0x10ABCDu}; + size_t wtextlen = 1; + const wchar_t invalid[1] = {(wchar_t)0x110000u}; + #else + const wchar_t wtext[3] = {(wchar_t)0xDBEAu, (wchar_t)0xDFCDu}; + size_t wtextlen = 2; + #endif + return PyUnicode_FromWideChar(wtext, wtextlen); + """), + ("make_utf8", "METH_NOARGS", + """ + return PyUnicode_FromString("\\xf4\\x8a\\xaf\\x8d"); + """)]) + wide = module.make_wide() + utf8 = module.make_utf8() + print(repr(wide), repr(utf8)) + assert wide == utf8 + class TestUnicode(BaseApiTest): def test_unicodeobject(self, space): From pypy.commits at gmail.com Tue Nov 14 18:09:33 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 15:09:33 -0800 (PST) Subject: [pypy-commit] pypy py3.5: fix Message-ID: <5a0b77ad.ecb2df0a.b8660.a78e@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93024:bf4ed8da1af1 Date: 2017-11-14 23:09 +0000 http://bitbucket.org/pypy/pypy/changeset/bf4ed8da1af1/ Log: fix diff --git a/pypy/module/cpyext/memoryobject.py b/pypy/module/cpyext/memoryobject.py --- a/pypy/module/cpyext/memoryobject.py +++ b/pypy/module/cpyext/memoryobject.py @@ -1,3 +1,4 @@ +from pypy.interpreter.error import oefmt from pypy.module.cpyext.api import ( cpython_api, CANNOT_FAIL, Py_MAX_FMT, Py_MAX_NDIMS, build_type_checkers, Py_ssize_tP, cts, parse_dir, bootstrap_function, Py_bufferP, slot_function) From pypy.commits at gmail.com Tue Nov 14 18:11:08 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 14 Nov 2017 15:11:08 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Update getargs.c to match CPython 3.5.4 Message-ID: <5a0b780c.cfd51c0a.3b8eb.1012@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93025:00710c6f69a6 Date: 2017-11-14 23:10 +0000 http://bitbucket.org/pypy/pypy/changeset/00710c6f69a6/ Log: Update getargs.c to match CPython 3.5.4 diff --git a/pypy/module/cpyext/src/getargs.c b/pypy/module/cpyext/src/getargs.c --- a/pypy/module/cpyext/src/getargs.c +++ b/pypy/module/cpyext/src/getargs.c @@ -35,14 +35,16 @@ } freelistentry_t; typedef struct { + freelistentry_t *entries; int first_available; - freelistentry_t *entries; + int entries_malloced; } freelist_t; +#define STATIC_FREELIST_ENTRIES 8 /* Forward */ static int vgetargs1(PyObject *, const char *, va_list *, int); -static void seterror(int, const char *, int *, const char *, const char *); +static void seterror(Py_ssize_t, const char *, int *, const char *, const char *); static char *convertitem(PyObject *, const char **, va_list *, int, int *, char *, size_t, freelist_t *); static char *converttuple(PyObject *, const char **, va_list *, int, @@ -176,7 +178,8 @@ freelist->entries[index].item); } } - PyMem_FREE(freelist->entries); + if (freelist->entries_malloced) + PyMem_FREE(freelist->entries); return retval; } @@ -195,8 +198,13 @@ const char *formatsave = format; Py_ssize_t i, len; char *msg; - freelist_t freelist = {0, NULL}; int compat = flags & FLAG_COMPAT; + freelistentry_t static_entries[STATIC_FREELIST_ENTRIES]; + freelist_t freelist; + + freelist.entries = static_entries; + freelist.first_available = 0; + freelist.entries_malloced = 0; assert(compat || (args != (PyObject*)NULL)); flags = flags & ~FLAG_COMPAT; @@ -229,15 +237,15 @@ message = format; endfmt = 1; break; + case '|': + if (level == 0) + min = max; + break; default: if (level == 0) { - if (c == 'O') - max++; - else if (isalpha(Py_CHARMASK(c))) { + if (isalpha(Py_CHARMASK(c))) if (c != 'e') /* skip encoded */ max++; - } else if (c == '|') - min = max; } break; } @@ -251,30 +259,31 @@ format = formatsave; - freelist.entries = PyMem_NEW(freelistentry_t, max); - if (freelist.entries == NULL) { - PyErr_NoMemory(); - return 0; + if (max > STATIC_FREELIST_ENTRIES) { + freelist.entries = PyMem_NEW(freelistentry_t, max); + if (freelist.entries == NULL) { + PyErr_NoMemory(); + return 0; + } + freelist.entries_malloced = 1; } if (compat) { if (max == 0) { if (args == NULL) return 1; - PyOS_snprintf(msgbuf, sizeof(msgbuf), - "%.200s%s takes no arguments", - fname==NULL ? "function" : fname, - fname==NULL ? "" : "()"); - PyErr_SetString(PyExc_TypeError, msgbuf); + PyErr_Format(PyExc_TypeError, + "%.200s%s takes no arguments", + fname==NULL ? "function" : fname, + fname==NULL ? "" : "()"); return cleanreturn(0, &freelist); } else if (min == 1 && max == 1) { if (args == NULL) { - PyOS_snprintf(msgbuf, sizeof(msgbuf), - "%.200s%s takes at least one argument", - fname==NULL ? "function" : fname, - fname==NULL ? "" : "()"); - PyErr_SetString(PyExc_TypeError, msgbuf); + PyErr_Format(PyExc_TypeError, + "%.200s%s takes at least one argument", + fname==NULL ? "function" : fname, + fname==NULL ? "" : "()"); return cleanreturn(0, &freelist); } msg = convertitem(args, &format, p_va, flags, levels, @@ -300,20 +309,18 @@ len = PyTuple_GET_SIZE(args); if (len < min || max < len) { - if (message == NULL) { - PyOS_snprintf(msgbuf, sizeof(msgbuf), - "%.150s%s takes %s %d argument%s " - "(%ld given)", - fname==NULL ? "function" : fname, - fname==NULL ? "" : "()", - min==max ? "exactly" - : len < min ? "at least" : "at most", - len < min ? min : max, - (len < min ? min : max) == 1 ? "" : "s", - Py_SAFE_DOWNCAST(len, Py_ssize_t, long)); - message = msgbuf; - } - PyErr_SetString(PyExc_TypeError, message); + if (message == NULL) + PyErr_Format(PyExc_TypeError, + "%.150s%s takes %s %d argument%s (%ld given)", + fname==NULL ? "function" : fname, + fname==NULL ? "" : "()", + min==max ? "exactly" + : len < min ? "at least" : "at most", + len < min ? min : max, + (len < min ? min : max) == 1 ? "" : "s", + Py_SAFE_DOWNCAST(len, Py_ssize_t, long)); + else + PyErr_SetString(PyExc_TypeError, message); return cleanreturn(0, &freelist); } @@ -324,7 +331,7 @@ flags, levels, msgbuf, sizeof(msgbuf), &freelist); if (msg) { - seterror(i+1, msg, levels, fname, msg); + seterror(i+1, msg, levels, fname, message); return cleanreturn(0, &freelist); } } @@ -343,7 +350,7 @@ static void -seterror(int iarg, const char *msg, int *levels, const char *fname, +seterror(Py_ssize_t iarg, const char *msg, int *levels, const char *fname, const char *message) { char buf[512]; @@ -359,10 +366,10 @@ } if (iarg != 0) { PyOS_snprintf(p, sizeof(buf) - (p - buf), - "argument %d", iarg); + "argument %" PY_FORMAT_SIZE_T "d", iarg); i = 0; p += strlen(p); - while (levels[i] > 0 && i < 32 && (int)(p-buf) < 220) { + while (i < 32 && levels[i] > 0 && (int)(p-buf) < 220) { PyOS_snprintf(p, sizeof(buf) - (p - buf), ", item %d", levels[i]-1); p += strlen(p); @@ -407,6 +414,7 @@ int n = 0; const char *format = *p_format; int i; + Py_ssize_t len; for (;;) { int c = *format++; @@ -436,12 +444,20 @@ return msgbuf; } - if ((i = PySequence_Size(arg)) != n) { + len = PySequence_Size(arg); + if (len != n) { levels[0] = 0; - PyOS_snprintf(msgbuf, bufsize, - toplevel ? "expected %d arguments, not %d" : - "must be sequence of length %d, not %d", - n, i); + if (toplevel) { + PyOS_snprintf(msgbuf, bufsize, + "expected %d arguments, not %" PY_FORMAT_SIZE_T "d", + n, len); + } + else { + PyOS_snprintf(msgbuf, bufsize, + "must be sequence of length %d, " + "not %" PY_FORMAT_SIZE_T "d", + n, len); + } return msgbuf; } @@ -457,7 +473,6 @@ strncpy(msgbuf, "is not retrievable", bufsize); return msgbuf; } - //PyPy_Borrow(arg, item); msg = convertitem(item, &format, p_va, flags, levels+1, msgbuf, bufsize, freelist); /* PySequence_GetItem calls tp->sq_item, which INCREFs */ @@ -502,9 +517,6 @@ -#define UNICODE_DEFAULT_ENCODING(arg) \ - _PyUnicode_AsDefaultEncodedString(arg, NULL) - /* Format an error message generated by convertsimple(). */ static char * @@ -512,9 +524,15 @@ { assert(expected != NULL); assert(arg != NULL); - PyOS_snprintf(msgbuf, bufsize, - "must be %.50s, not %.50s", expected, - arg == Py_None ? "None" : arg->ob_type->tp_name); + if (expected[0] == '(') { + PyOS_snprintf(msgbuf, bufsize, + "%.100s", expected); + } + else { + PyOS_snprintf(msgbuf, bufsize, + "must be %.50s, not %.50s", expected, + arg == Py_None ? "None" : arg->ob_type->tp_name); + } return msgbuf; } @@ -560,14 +578,14 @@ "size does not fit in an int"); \ return converterr("", arg, msgbuf, bufsize); \ } \ - *q=s; \ + *q = (int)s; \ } #define BUFFER_LEN ((flags & FLAG_SIZE_T) ? *q2:*q) #define RETURN_ERR_OCCURRED return msgbuf const char *format = *p_format; char c = *format++; - PyObject *uarg; + char *sarg; switch (c) { @@ -718,7 +736,7 @@ if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongMask(arg); else - return converterr("integer", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); *p = ival; break; } @@ -743,7 +761,7 @@ if (PyLong_Check(arg)) ival = PyLong_AsUnsignedLongLongMask(arg); else - return converterr("integer", arg, msgbuf, bufsize); + return converterr("int", arg, msgbuf, bufsize); *p = ival; break; } @@ -784,6 +802,8 @@ char *p = va_arg(*p_va, char *); if (PyBytes_Check(arg) && PyBytes_Size(arg) == 1) *p = PyBytes_AS_STRING(arg)[0]; + else if (PyByteArray_Check(arg) && PyByteArray_Size(arg) == 1) + *p = PyByteArray_AS_STRING(arg)[0]; else return converterr("a byte string of length 1", arg, msgbuf, bufsize); break; @@ -791,18 +811,40 @@ case 'C': {/* unicode char */ int *p = va_arg(*p_va, int *); - if (PyUnicode_Check(arg) && - PyUnicode_GET_SIZE(arg) == 1) - *p = PyUnicode_AS_UNICODE(arg)[0]; + int kind; + void *data; + + if (!PyUnicode_Check(arg)) + return converterr("a unicode character", arg, msgbuf, bufsize); + + if (PyUnicode_READY(arg)) + RETURN_ERR_OCCURRED; + + if (PyUnicode_GET_LENGTH(arg) != 1) + return converterr("a unicode character", arg, msgbuf, bufsize); + + kind = PyUnicode_KIND(arg); + data = PyUnicode_DATA(arg); + *p = PyUnicode_READ(kind, data, 0); + break; + } + + case 'p': {/* boolean *p*redicate */ + int *p = va_arg(*p_va, int *); + int val = PyObject_IsTrue(arg); + if (val > 0) + *p = 1; + else if (val == 0) + *p = 0; else - return converterr("a unicode character", arg, msgbuf, bufsize); + RETURN_ERR_OCCURRED; break; } /* XXX WAAAAH! 's', 'y', 'z', 'u', 'Z', 'e', 'w' codes all need to be cleaned up! */ - case 'y': {/* any buffer-like object, but not PyUnicode */ + case 'y': {/* any bytes-like object */ void **p = (void **)va_arg(*p_va, char **); char *buf; Py_ssize_t count; @@ -825,16 +867,16 @@ STORE_SIZE(count); format++; } else { - if (strlen(*p) != count) - return converterr( - "bytes without null bytes", - arg, msgbuf, bufsize); + if (strlen(*p) != (size_t)count) { + PyErr_SetString(PyExc_ValueError, "embedded null byte"); + RETURN_ERR_OCCURRED; + } } break; } - case 's': /* text string */ - case 'z': /* text string or None */ + case 's': /* text string or bytes-like object */ + case 'z': /* text string, bytes-like object or None */ { if (*format == '*') { /* "s*" or "z*" */ @@ -843,15 +885,14 @@ if (c == 'z' && arg == Py_None) PyBuffer_FillInfo(p, NULL, NULL, 0, 1, 0); else if (PyUnicode_Check(arg)) { - uarg = UNICODE_DEFAULT_ENCODING(arg); - if (uarg == NULL) + Py_ssize_t len; + sarg = PyUnicode_AsUTF8AndSize(arg, &len); + if (sarg == NULL) return converterr(CONV_UNICODE, arg, msgbuf, bufsize); - PyBuffer_FillInfo(p, arg, - PyBytes_AS_STRING(uarg), PyBytes_GET_SIZE(uarg), - 1, 0); + PyBuffer_FillInfo(p, arg, sarg, len, 1, 0); } - else { /* any buffer-like object */ + else { /* any bytes-like object */ char *buf; if (getbuffer(arg, p, &buf) < 0) return converterr(buf, arg, msgbuf, bufsize); @@ -862,7 +903,7 @@ arg, msgbuf, bufsize); } format++; - } else if (*format == '#') { /* any buffer-like object */ + } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ void **p = (void **)va_arg(*p_va, char **); FETCH_SIZE; @@ -872,14 +913,15 @@ STORE_SIZE(0); } else if (PyUnicode_Check(arg)) { - uarg = UNICODE_DEFAULT_ENCODING(arg); - if (uarg == NULL) + Py_ssize_t len; + sarg = PyUnicode_AsUTF8AndSize(arg, &len); + if (sarg == NULL) return converterr(CONV_UNICODE, arg, msgbuf, bufsize); - *p = PyBytes_AS_STRING(uarg); - STORE_SIZE(PyBytes_GET_SIZE(uarg)); + *p = sarg; + STORE_SIZE(len); } - else { /* any buffer-like object */ + else { /* read-only bytes-like object */ /* XXX Really? */ char *buf; Py_ssize_t count = convertbuffer(arg, p, &buf); @@ -891,26 +933,25 @@ } else { /* "s" or "z" */ char **p = va_arg(*p_va, char **); - uarg = NULL; + Py_ssize_t len; + sarg = NULL; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { - uarg = UNICODE_DEFAULT_ENCODING(arg); - if (uarg == NULL) + sarg = PyUnicode_AsUTF8AndSize(arg, &len); + if (sarg == NULL) return converterr(CONV_UNICODE, arg, msgbuf, bufsize); - *p = PyBytes_AS_STRING(uarg); + if (strlen(sarg) != (size_t)len) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + RETURN_ERR_OCCURRED; + } + *p = sarg; } else return converterr(c == 'z' ? "str or None" : "str", arg, msgbuf, bufsize); - if (*p != NULL && uarg != NULL && - (Py_ssize_t) strlen(*p) != PyBytes_GET_SIZE(uarg)) - return converterr( - c == 'z' ? "str without null bytes or None" - : "str without null bytes", - arg, msgbuf, bufsize); } break; } @@ -918,9 +959,10 @@ case 'u': /* raw unicode buffer (Py_UNICODE *) */ case 'Z': /* raw unicode buffer or None */ { - if (*format == '#') { /* any buffer-like object */ - /* "s#" or "Z#" */ - Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); + Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); + + if (*format == '#') { + /* "u#" or "Z#" */ FETCH_SIZE; if (c == 'Z' && arg == Py_None) { @@ -928,24 +970,29 @@ STORE_SIZE(0); } else if (PyUnicode_Check(arg)) { - *p = PyUnicode_AS_UNICODE(arg); - STORE_SIZE(PyUnicode_GET_SIZE(arg)); + Py_ssize_t len; + *p = PyUnicode_AsUnicodeAndSize(arg, &len); + if (*p == NULL) + RETURN_ERR_OCCURRED; + STORE_SIZE(len); } else - return converterr("str or None", arg, msgbuf, bufsize); + return converterr(c == 'Z' ? "str or None" : "str", + arg, msgbuf, bufsize); format++; } else { - /* "s" or "Z" */ - Py_UNICODE **p = va_arg(*p_va, Py_UNICODE **); - + /* "u" or "Z" */ if (c == 'Z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { - *p = PyUnicode_AS_UNICODE(arg); - if (Py_UNICODE_strlen(*p) != PyUnicode_GET_SIZE(arg)) - return converterr( - "str without null character or None", - arg, msgbuf, bufsize); + Py_ssize_t len; + *p = PyUnicode_AsUnicodeAndSize(arg, &len); + if (*p == NULL) + RETURN_ERR_OCCURRED; + if (Py_UNICODE_strlen(*p) != (size_t)len) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + RETURN_ERR_OCCURRED; + } } else return converterr(c == 'Z' ? "str or None" : "str", arg, msgbuf, bufsize); @@ -1071,9 +1118,11 @@ } else { if (size + 1 > BUFFER_LEN) { Py_DECREF(s); - return converterr( - "(buffer overflow)", - arg, msgbuf, bufsize); + PyErr_Format(PyExc_TypeError, + "encoded string too long " + "(%zd, maximum length %zd)", + (Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1)); + RETURN_ERR_OCCURRED; } } memcpy(*buffer, ptr, size+1); @@ -1095,7 +1144,7 @@ if ((Py_ssize_t)strlen(ptr) != size) { Py_DECREF(s); return converterr( - "encoded string without NULL bytes", + "encoded string without null bytes", arg, msgbuf, bufsize); } *buffer = PyMem_NEW(char, size + 1); @@ -1135,8 +1184,11 @@ case 'U': { /* PyUnicode object */ PyObject **p = va_arg(*p_va, PyObject **); - if (PyUnicode_Check(arg)) + if (PyUnicode_Check(arg)) { + if (PyUnicode_READY(arg) == -1) + RETURN_ERR_OCCURRED; *p = arg; + } else return converterr("str", arg, msgbuf, bufsize); break; @@ -1182,7 +1234,7 @@ if (*format != '*') return converterr( - "invalid use of 'w' format character", + "(invalid use of 'w' format character)", arg, msgbuf, bufsize); format++; @@ -1190,7 +1242,8 @@ supports it directly. */ if (PyObject_GetBuffer(arg, (Py_buffer*)p, PyBUF_WRITABLE) < 0) { PyErr_Clear(); - return converterr("read-write buffer", arg, msgbuf, bufsize); + return converterr("read-write bytes-like object", + arg, msgbuf, bufsize); } if (!PyBuffer_IsContiguous((Py_buffer*)p, 'C')) { PyBuffer_Release((Py_buffer*)p); @@ -1205,7 +1258,7 @@ } default: - return converterr("impossible", arg, msgbuf, bufsize); + return converterr("(impossible)", arg, msgbuf, bufsize); } @@ -1228,7 +1281,7 @@ *errmsg = NULL; *p = NULL; if (pb != NULL && pb->bf_releasebuffer != NULL) { - *errmsg = "read-only pinned buffer"; + *errmsg = "read-only bytes-like object"; return -1; } @@ -1244,7 +1297,7 @@ getbuffer(PyObject *arg, Py_buffer *view, char **errmsg) { if (PyObject_GetBuffer(arg, view, PyBUF_SIMPLE) != 0) { - *errmsg = "bytes or buffer"; + *errmsg = "bytes-like object"; return -1; } if (!PyBuffer_IsContiguous(view, 'C')) { @@ -1383,9 +1436,16 @@ int levels[32]; const char *fname, *msg, *custom_msg, *keyword; int min = INT_MAX; - int i, len, nargs, nkeywords; + int max = INT_MAX; + int i, len; + Py_ssize_t nargs, nkeywords; PyObject *current_arg; - freelist_t freelist = {0, NULL}; + freelistentry_t static_entries[STATIC_FREELIST_ENTRIES]; + freelist_t freelist; + + freelist.entries = static_entries; + freelist.first_available = 0; + freelist.entries_malloced = 0; assert(args != NULL && PyTuple_Check(args)); assert(keywords == NULL || PyDict_Check(keywords)); @@ -1409,17 +1469,20 @@ for (len=0; kwlist[len]; len++) continue; - freelist.entries = PyMem_NEW(freelistentry_t, len); - if (freelist.entries == NULL) { - PyErr_NoMemory(); - return 0; + if (len > STATIC_FREELIST_ENTRIES) { + freelist.entries = PyMem_NEW(freelistentry_t, len); + if (freelist.entries == NULL) { + PyErr_NoMemory(); + return 0; + } + freelist.entries_malloced = 1; } nargs = PyTuple_GET_SIZE(args); nkeywords = (keywords == NULL) ? 0 : PyDict_Size(keywords); if (nargs + nkeywords > len) { - PyErr_Format(PyExc_TypeError, "%s%s takes at most %d " - "argument%s (%d given)", + PyErr_Format(PyExc_TypeError, + "%s%s takes at most %d argument%s (%zd given)", (fname == NULL) ? "function" : fname, (fname == NULL) ? "" : "()", len, @@ -1432,8 +1495,39 @@ for (i = 0; i < len; i++) { keyword = kwlist[i]; if (*format == '|') { + if (min != INT_MAX) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid format string (| specified twice)"); + return cleanreturn(0, &freelist); + } + min = i; format++; + + if (max != INT_MAX) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid format string ($ before |)"); + return cleanreturn(0, &freelist); + } + } + if (*format == '$') { + if (max != INT_MAX) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid format string ($ specified twice)"); + return cleanreturn(0, &freelist); + } + + max = i; + format++; + + if (max < nargs) { + PyErr_Format(PyExc_TypeError, + "Function takes %s %d positional arguments" + " (%d given)", + (min != INT_MAX) ? "at most" : "exactly", + max, nargs); + return cleanreturn(0, &freelist); + } } if (IS_END_OF_FORMAT(*format)) { PyErr_Format(PyExc_RuntimeError, @@ -1494,7 +1588,7 @@ } } - if (!IS_END_OF_FORMAT(*format) && *format != '|') { + if (!IS_END_OF_FORMAT(*format) && (*format != '|') && (*format != '$')) { PyErr_Format(PyExc_RuntimeError, "more argument specifiers than keyword list entries " "(remaining format:'%s')", format); @@ -1507,12 +1601,13 @@ Py_ssize_t pos = 0; while (PyDict_Next(keywords, &pos, &key, &value)) { int match = 0; - char *ks; + char* ks; if (!PyUnicode_Check(key)) { PyErr_SetString(PyExc_TypeError, "keywords must be strings"); return cleanreturn(0, &freelist); } + /* check that _PyUnicode_AsString() result is not NULL */ ks = _PyUnicode_AsString(key); if (ks != NULL) { @@ -1545,8 +1640,10 @@ switch (c) { - /* simple codes - * The individual types (second arg of va_arg) are irrelevant */ + /* + * codes that take a single data pointer as an argument + * (the type of the pointer is irrelevant) + */ case 'b': /* byte -- very short int */ case 'B': /* byte as bitfield */ @@ -1560,22 +1657,21 @@ case 'L': /* PY_LONG_LONG */ case 'K': /* PY_LONG_LONG sized bitfield */ #endif + case 'n': /* Py_ssize_t */ case 'f': /* float */ case 'd': /* double */ case 'D': /* complex double */ case 'c': /* char */ case 'C': /* unicode char */ + case 'p': /* boolean predicate */ + case 'S': /* string object */ + case 'Y': /* string object */ + case 'U': /* unicode string object */ { (void) va_arg(*p_va, void *); break; } - case 'n': /* Py_ssize_t */ - { - (void) va_arg(*p_va, Py_ssize_t *); - break; - } - /* string codes */ case 'e': /* string with encoding */ @@ -1608,16 +1704,6 @@ break; } - /* object codes */ - - case 'S': /* string object */ - case 'Y': /* string object */ - case 'U': /* unicode string object */ - { - (void) va_arg(*p_va, PyObject **); - break; - } - case 'O': /* object */ { if (*format == '!') { @@ -1731,7 +1817,7 @@ /* For type constructors that don't take keyword args * - * Sets a TypeError and returns 0 if the kwds dict is + * Sets a TypeError and returns 0 if the args/kwargs is * not empty, returns 1 otherwise */ int @@ -1750,6 +1836,25 @@ funcname); return 0; } + + +int +_PyArg_NoPositional(const char *funcname, PyObject *args) +{ + if (args == NULL) + return 1; + if (!PyTuple_CheckExact(args)) { + PyErr_BadInternalCall(); + return 0; + } + if (PyTuple_GET_SIZE(args) == 0) + return 1; + + PyErr_Format(PyExc_TypeError, "%s does not take positional arguments", + funcname); + return 0; +} + #ifdef __cplusplus }; #endif From pypy.commits at gmail.com Tue Nov 14 20:14:11 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:11 -0800 (PST) Subject: [pypy-commit] pypy vmprof-enable-kwargs: a branch where to implement the same _vmprof logic as in vmprof's enable-kwargs branch Message-ID: <5a0b94e3.08b51c0a.c1328.e8cf@mx.google.com> Author: Antonio Cuni Branch: vmprof-enable-kwargs Changeset: r93026:ef1507afe550 Date: 2017-11-10 16:42 +0100 http://bitbucket.org/pypy/pypy/changeset/ef1507afe550/ Log: a branch where to implement the same _vmprof logic as in vmprof's enable-kwargs branch From pypy.commits at gmail.com Tue Nov 14 20:14:13 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:13 -0800 (PST) Subject: [pypy-commit] pypy default: improve the test by also checking the full call-stack at various points. Probably it does not test anything more than before, but it is a good aid when you read it Message-ID: <5a0b94e5.52c6df0a.2c65f.64fb@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93027:b7758cca88a3 Date: 2017-11-13 18:40 +0100 http://bitbucket.org/pypy/pypy/changeset/b7758cca88a3/ Log: improve the test by also checking the full call-stack at various points. Probably it does not test anything more than before, but it is a good aid when you read it diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -340,16 +340,41 @@ import sys from _continuation import continulet # + def stack(f=None): + """ + get the call-stack of the caller or the specified frame + """ + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame loop + res.append('...') + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # + assert stack() == ['test_f_back'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -358,14 +383,20 @@ f3_foo = c.switch() assert f3_foo is f2_foo assert f1_bar.f_back is f3_foo + # def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() From pypy.commits at gmail.com Tue Nov 14 20:14:20 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:20 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop: WIP: refactor test_xxx into a proper failing tests, with a decent name and real asserts Message-ID: <5a0b94ec.09a0df0a.5b3a0.1732@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop Changeset: r93030:3e4c6ca55d1d Date: 2017-11-15 01:54 +0100 http://bitbucket.org/pypy/pypy/changeset/3e4c6ca55d1d/ Log: WIP: refactor test_xxx into a proper failing tests, with a decent name and real asserts diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,28 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle (shouldn't happen) + res.append('...') + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) def test_new_empty(self): from _continuation import continulet @@ -336,70 +358,31 @@ assert res == 2002 assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] - def test_xxx(self): + def test_f_back_no_cycles(self): import sys from _continuation import continulet + stack = self.stack # - def stack(f=None): - """ - get the call-stack of the caller or the specified frame - """ - if f is None: - f = sys._getframe(1) - res = [] - seen = set() - while f: - if f in seen: - # frame loop - res.append('...') - break - seen.add(f) - res.append(f.f_code.co_name) - f = f.f_back - print res - return res - def bar(c): f = sys._getframe(0) - print 'bar 1' + assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] c.switch(f) - print 'bar 2' + assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] def foo(c): bar(c) - - print + # c = continulet(foo) - print 'test 1' + assert stack() == ['test_f_back_no_cycles'] f = c.switch() - print 'test 2' - xxx = c.switch() - print 'xxx', xxx - #stack() - #stack(f) + assert stack() == ['test_f_back_no_cycles'] + assert stack(f) == ['bar', 'foo'] + c.switch() def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def stack(f=None): - """ - get the call-stack of the caller or the specified frame - """ - if f is None: - f = sys._getframe(1) - res = [] - seen = set() - while f: - if f in seen: - # frame loop - res.append('...') - break - seen.add(f) - res.append(f.f_code.co_name) - f = f.f_back - #print res - return res - def bar(c): assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) @@ -436,6 +419,7 @@ f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main2', 'test_f_back'] assert stack(f1_bar) == ['bar', 'foo', '...'] # main() From pypy.commits at gmail.com Tue Nov 14 20:14:16 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:16 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop: temporary checkin with some debugging stuff + a new logic to avoid building cycles of frames Message-ID: <5a0b94e8.c380df0a.2925d.3942@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop Changeset: r93028:f7a1a6eb6908 Date: 2017-11-14 11:53 +0100 http://bitbucket.org/pypy/pypy/changeset/f7a1a6eb6908/ Log: temporary checkin with some debugging stuff + a new logic to avoid building cycles of frames diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -41,13 +41,16 @@ bottomframe.locals_cells_stack_w[3] = w_kwds bottomframe.last_exception = get_cleared_operation_error(space) self.bottomframe = bottomframe + self.topframe = sthread.ec.topframeref # XXX? # global_state.origin = self self.sthread = sthread + pstack(self, 'descr_init') h = sthread.new(new_stacklet_callback) post_switch(sthread, h) def switch(self, w_to): + #import pdb;pdb.set_trace() sthread = self.sthread to = self.space.interp_w(W_Continulet, w_to, can_be_None=True) if to is not None and to.sthread is None: @@ -76,9 +79,11 @@ global_state.origin = self if to is None: # simple switch: going to self.h + #print 'simple switch' global_state.destination = self else: # double switch: the final destination is to.h + #print 'double switch' global_state.destination = to # h = sthread.switch(global_state.destination.h) @@ -217,6 +222,23 @@ global_state.clear() +def pstack(cont, message=''): + return + if message: + print message + if isinstance(cont, jit.DirectJitVRef): + f = cont() + else: + f = cont.bottomframe + i = 0 + while f: + print ' ', f.pycode.co_name + f = f.f_backref() + i += 1 + if i == 10: + break + print + def new_stacklet_callback(h, arg): self = global_state.origin self.h = h @@ -225,6 +247,7 @@ frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: + #import pdb;pdb.xpm() global_state.propagate_exception = e else: global_state.w_value = w_result @@ -236,15 +259,32 @@ def post_switch(sthread, h): origin = global_state.origin self = global_state.destination + #import pdb;pdb.set_trace() global_state.origin = None global_state.destination = None self.h, origin.h = origin.h, h # current = sthread.ec.topframeref - sthread.ec.topframeref = self.bottomframe.f_backref - self.bottomframe.f_backref = origin.bottomframe.f_backref - origin.bottomframe.f_backref = current + print '==== SWITCH ====' + pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') + pstack(self, 'self') + + # ORGINAL + ## sthread.ec.topframeref = self.bottomframe.f_backref + ## self.bottomframe.f_backref = origin.bottomframe.f_backref + ## origin.bottomframe.f_backref = current + + # antocuni + sthread.ec.topframeref = self.topframe + self.topframe = origin.topframe + origin.topframe = current + # + print 'swap' + pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') + pstack(self, 'self') + print '==== END SWITCH ====' + print return get_result() def get_result(): diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -336,6 +336,47 @@ assert res == 2002 assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + def test_xxx(self): + import sys + from _continuation import continulet + # + def stack(f=None): + """ + get the call-stack of the caller or the specified frame + """ + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame loop + res.append('...') + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + print res + return res + + def bar(c): + f = sys._getframe(0) + print 'bar 1' + c.switch(f) + print 'bar 2' + def foo(c): + bar(c) + + print + c = continulet(foo) + print 'test 1' + f = c.switch() + print 'test 2' + xxx = c.switch() + print 'xxx', xxx + #stack() + #stack(f) + def test_f_back(self): import sys from _continuation import continulet From pypy.commits at gmail.com Tue Nov 14 20:14:22 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:22 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop: add a passing test to check that we stick the continulet stack at the right position of the f_back chain Message-ID: <5a0b94ee.52c6df0a.2c65f.6509@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop Changeset: r93031:82b54bb1e271 Date: 2017-11-15 02:01 +0100 http://bitbucket.org/pypy/pypy/changeset/82b54bb1e271/ Log: add a passing test to check that we stick the continulet stack at the right position of the f_back chain diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -378,6 +378,34 @@ assert stack(f) == ['bar', 'foo'] c.switch() + def test_f_back_proper_chaining(self): + import sys + from _continuation import continulet + stack = self.stack + # + seen = [] + def bar(c): + seen.append(2) + assert stack() == ['bar', 'foo', 'test_f_back_proper_chaining'] + c.switch() + seen.append(5) + assert stack() == ['bar', 'foo', 'main', + 'test_f_back_proper_chaining'] + def foo(c): + bar(c) + def main(c): + seen.append(4) + assert stack() == ['main', 'test_f_back_proper_chaining'] + c.switch() + seen.append(6) + + c = continulet(foo) + seen.append(1) + c.switch() + seen.append(3) + f = main(c) + assert seen == [1, 2, 3, 4, 5, 6] + def test_f_back(self): import sys from _continuation import continulet From pypy.commits at gmail.com Tue Nov 14 20:14:18 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:18 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop: WIP: add two temporary flags which allow to select: 1) the old switch logic vs the new one and 2) whether to print or not debug infos Message-ID: <5a0b94ea.26acdf0a.6d561.e079@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop Changeset: r93029:ba1ff85734b9 Date: 2017-11-15 01:43 +0100 http://bitbucket.org/pypy/pypy/changeset/ba1ff85734b9/ Log: WIP: add two temporary flags which allow to select: 1) the old switch logic vs the new one and 2) whether to print or not debug infos diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -41,7 +41,7 @@ bottomframe.locals_cells_stack_w[3] = w_kwds bottomframe.last_exception = get_cleared_operation_error(space) self.bottomframe = bottomframe - self.topframe = sthread.ec.topframeref # XXX? + self.topframeref = sthread.ec.topframeref # global_state.origin = self self.sthread = sthread @@ -79,11 +79,9 @@ global_state.origin = self if to is None: # simple switch: going to self.h - #print 'simple switch' global_state.destination = self else: # double switch: the final destination is to.h - #print 'double switch' global_state.destination = to # h = sthread.switch(global_state.destination.h) @@ -221,24 +219,6 @@ global_state = GlobalState() global_state.clear() - -def pstack(cont, message=''): - return - if message: - print message - if isinstance(cont, jit.DirectJitVRef): - f = cont() - else: - f = cont.bottomframe - i = 0 - while f: - print ' ', f.pycode.co_name - f = f.f_backref() - i += 1 - if i == 10: - break - print - def new_stacklet_callback(h, arg): self = global_state.origin self.h = h @@ -256,35 +236,66 @@ global_state.destination = self return self.h +DEBUG = False +ORIGINAL = False + +def log(x=''): + if DEBUG: + print x + +def pstack(cont, message=''): + """ + NOTE: I don't know exactly why, but sometimes if you pstack() a sthread or + a frame, then later you get an InvalidVirtualRef exception. So, in + general, this is a useful debugging tool but don't expect your tests to + pass if you call it. Put DEBUG=False to disable. + """ + if not DEBUG: + return + if message: + print message + if isinstance(cont, jit.DirectJitVRef): + f = cont() + else: + f = cont.bottomframe + i = 0 + while f: + print ' ', f.pycode.co_name + f = f.f_backref() + i += 1 + if i == 10: + break + print + def post_switch(sthread, h): origin = global_state.origin self = global_state.destination - #import pdb;pdb.set_trace() global_state.origin = None global_state.destination = None self.h, origin.h = origin.h, h # current = sthread.ec.topframeref - print '==== SWITCH ====' + lo + g('==== SWITCH ====') pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') pstack(self, 'self') - # ORGINAL - ## sthread.ec.topframeref = self.bottomframe.f_backref - ## self.bottomframe.f_backref = origin.bottomframe.f_backref - ## origin.bottomframe.f_backref = current - - # antocuni - sthread.ec.topframeref = self.topframe - self.topframe = origin.topframe - origin.topframe = current - + if ORIGINAL: + sthread.ec.topframeref = self.bottomframe.f_backref + self.bottomframe.f_backref = origin.bottomframe.f_backref + origin.bottomframe.f_backref = current + else: + # antocuni + sthread.ec.topframeref = self.topframeref + self.topframeref = origin.topframeref + origin.topframeref = current # - print 'swap' + log('swap') pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') - pstack(self, 'self') - print '==== END SWITCH ====' - print + pstack(self + , 'self') + log('==== END SWITCH ====') + log() return get_result() def get_result(): @@ -315,7 +326,8 @@ if cont.sthread is None: continue # ignore non-initialized continulets else: - raise geterror(space, "inter-thread support is missing") + raise geterror(space + , "inter-thread support is missing") elif sthread.is_empty_handle(cont.h): raise geterror(space, "got an already-finished continulet") contlist.append(cont) From pypy.commits at gmail.com Tue Nov 14 20:14:24 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:24 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop: introduce the concept of running/paused continulet, depending on bottomframe.f_backref; fix the post_switch() logic to build the f_back chain correctly; finally fix test_f_back to check that we do NOT build cycles of frames Message-ID: <5a0b94f0.ddb1df0a.34cbb.596d@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop Changeset: r93032:d5212118820d Date: 2017-11-15 02:11 +0100 http://bitbucket.org/pypy/pypy/changeset/d5212118820d/ Log: introduce the concept of running/paused continulet, depending on bottomframe.f_backref; fix the post_switch() logic to build the f_back chain correctly; finally fix test_f_back to check that we do NOT build cycles of frames diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -17,6 +17,8 @@ # states: # - not init'ed: self.sthread == None # - normal: self.sthread != None, not is_empty_handle(self.h) + # * running: self.bottomframe.f_backref is not vref_None + # * paused: self.bottomframe.f_backref is vref_None # - finished: self.sthread != None, is_empty_handle(self.h) def check_sthread(self): @@ -275,8 +277,7 @@ self.h, origin.h = origin.h, h # current = sthread.ec.topframeref - lo - g('==== SWITCH ====') + log('==== SWITCH ====') pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') pstack(self, 'self') @@ -285,15 +286,20 @@ self.bottomframe.f_backref = origin.bottomframe.f_backref origin.bottomframe.f_backref = current else: - # antocuni sthread.ec.topframeref = self.topframeref self.topframeref = origin.topframeref + self.bottomframe.f_backref = origin.bottomframe.f_backref origin.topframeref = current + if origin.bottomframe.f_backref is jit.vref_None: + # paused ==> running: build the f_back link + origin.bottomframe.f_backref = current + else: + # running ==> paused: break the f_back link + origin.bottomframe.f_backref = jit.vref_None # log('swap') pstack(sthread.ec.topframeref, 'sthread.ec.topframeref') - pstack(self - , 'self') + pstack(self, 'self') log('==== END SWITCH ====') log() return get_result() diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -406,27 +406,27 @@ f = main(c) assert seen == [1, 2, 3, 4, 5, 6] - def test_f_back(self): + def test_f_back_complex(self): import sys from _continuation import continulet stack = self.stack # def bar(c): - assert stack() == ['bar', 'foo', 'test_f_back'] + assert stack() == ['bar', 'foo', 'test_f_back_complex'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) # - assert stack() == ['bar', 'foo', 'main', 'test_f_back'] + assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex'] c.switch(sys._getframe(1).f_back) # - assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # - assert stack() == ['test_f_back'] + assert stack() == ['test_f_back_complex'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -439,16 +439,16 @@ def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' - assert f3_foo.f_back is f1_bar # not running, so a loop - assert stack() == ['main', 'test_f_back'] - assert stack(f1_bar) == ['bar', 'foo', '...'] + assert f3_foo.f_back is None # not running + assert stack() == ['main', 'test_f_back_complex'] + assert stack(f1_bar) == ['bar', 'foo'] # def main2(): f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' - assert f3_foo.f_back is f1_bar # not running, so a loop - assert stack() == ['main2', 'test_f_back'] - assert stack(f1_bar) == ['bar', 'foo', '...'] + assert f3_foo.f_back is None # not running + assert stack() == ['main2', 'test_f_back_complex'] + assert stack(f1_bar) == ['bar', 'foo'] # main() main2() From pypy.commits at gmail.com Tue Nov 14 20:14:27 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 14 Nov 2017 17:14:27 -0800 (PST) Subject: [pypy-commit] pypy default: merge heads Message-ID: <5a0b94f3.42da1c0a.3b8a9.9a94@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93033:821e59360f37 Date: 2017-11-15 02:13 +0100 http://bitbucket.org/pypy/pypy/changeset/821e59360f37/ Log: merge heads diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -340,16 +340,41 @@ import sys from _continuation import continulet # + def stack(f=None): + """ + get the call-stack of the caller or the specified frame + """ + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame loop + res.append('...') + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # + assert stack() == ['test_f_back'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -358,14 +383,20 @@ f3_foo = c.switch() assert f3_foo is f2_foo assert f1_bar.f_back is f3_foo + # def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() From pypy.commits at gmail.com Wed Nov 15 10:07:19 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:19 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: make sure that vmprof don't sample the stack in the middle of stacklet switching, else it reads nonsense and BOOM Message-ID: <5a0c5827.0e97df0a.8f39d.3c79@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch Changeset: r93034:3e4e9ff62be1 Date: 2017-11-15 12:59 +0100 http://bitbucket.org/pypy/pypy/changeset/3e4e9ff62be1/ Log: make sure that vmprof don't sample the stack in the middle of stacklet switching, else it reads nonsense and BOOM diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,6 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rlib import rvmprof from rpython.rlib.rvmprof import cintf DEBUG = False @@ -40,11 +41,13 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) + rvmprof.stop_sampling(space=None) # XXX x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) + rvmprof.start_sampling(space=None) # XXX if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py --- a/rpython/rlib/test/test_rstacklet.py +++ b/rpython/rlib/test/test_rstacklet.py @@ -10,6 +10,8 @@ from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT from rpython.rlib import rrandom, rgc from rpython.rlib.rarithmetic import intmask +from rpython.rlib.nonconst import NonConstant +from rpython.rlib import rvmprof from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.translator.c.test.test_standalone import StandaloneTests @@ -273,7 +275,23 @@ llmemory.raw_free(raw) +# +# bah, we need to make sure that vmprof_execute_code is annotated, else +# rvmprof.c does not compile correctly +class FakeVMProfCode(object): + pass +rvmprof.register_code_object_class(MyCode, lambda code: 'name') + at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) +def fake_vmprof_main(code, num): + return 42 +# + def entry_point(argv): + # + if NonConstant(False): + fake_vmprof_main(FakeVMProfCode(), 42) + # + # seed = 0 if len(argv) > 1: seed = int(argv[1]) From pypy.commits at gmail.com Wed Nov 15 10:07:22 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:22 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: fix typo Message-ID: <5a0c582a.f2a9df0a.850f5.445e@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch Changeset: r93035:1b1bdd6c7f39 Date: 2017-11-15 12:05 +0000 http://bitbucket.org/pypy/pypy/changeset/1b1bdd6c7f39/ Log: fix typo diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py --- a/rpython/rlib/test/test_rstacklet.py +++ b/rpython/rlib/test/test_rstacklet.py @@ -280,7 +280,7 @@ # rvmprof.c does not compile correctly class FakeVMProfCode(object): pass -rvmprof.register_code_object_class(MyCode, lambda code: 'name') +rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name') @rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) def fake_vmprof_main(code, num): return 42 From pypy.commits at gmail.com Wed Nov 15 10:07:24 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:24 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: add a test which is failing (i.e., segfaulting) on default, and that it seems to be fixed on this branch Message-ID: <5a0c582c.01ed1c0a.a3420.6656@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch Changeset: r93036:52a6650ba479 Date: 2017-11-15 15:49 +0100 http://bitbucket.org/pypy/pypy/changeset/52a6650ba479/ Log: add a test which is failing (i.e., segfaulting) on default, and that it seems to be fixed on this branch diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): @@ -124,7 +152,8 @@ try: import thread except ImportError: - py.test.skip("no threads") + py.test.ski + p("no threads") ts = [ThreadTest(thread.allocate_lock()) for i in range(5)] for t in ts: thread.start_new_thread(t.run, ()) From pypy.commits at gmail.com Wed Nov 15 10:07:26 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:26 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: remove the space param from rvmprof.{start, stop}_sampling: it is not used and also it does not make sense to have it inside rlib Message-ID: <5a0c582e.099fdf0a.c3df7.e37c@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch Changeset: r93037:46ee55287ed4 Date: 2017-11-15 16:03 +0100 http://bitbucket.org/pypy/pypy/changeset/46ee55287ed4/ Log: remove the space param from rvmprof.{start,stop}_sampling: it is not used and also it does not make sense to have it inside rlib diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -93,8 +93,8 @@ return space.newtext(path) def stop_sampling(space): - return space.newint(rvmprof.stop_sampling(space)) + return space.newint(rvmprof.stop_sampling()) def start_sampling(space): - rvmprof.start_sampling(space) + rvmprof.start_sampling() return space.w_None diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -41,13 +41,13 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - rvmprof.stop_sampling(space=None) # XXX + rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) - rvmprof.start_sampling(space=None) # XXX + rvmprof.start_sampling() if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -55,9 +55,9 @@ return None -def stop_sampling(space): +def stop_sampling(): fd = _get_vmprof().cintf.vmprof_stop_sampling() return rffi.cast(lltype.Signed, fd) -def start_sampling(space): +def start_sampling(): _get_vmprof().cintf.vmprof_start_sampling() From pypy.commits at gmail.com Wed Nov 15 10:07:27 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:27 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch: close merged branch Message-ID: <5a0c582f.7a86df0a.a4821.7ca4@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch Changeset: r93038:2c8aa32187c0 Date: 2017-11-15 16:04 +0100 http://bitbucket.org/pypy/pypy/changeset/2c8aa32187c0/ Log: close merged branch From pypy.commits at gmail.com Wed Nov 15 10:07:30 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 07:07:30 -0800 (PST) Subject: [pypy-commit] pypy default: merge the fix-vmprof-stacklet-switch: make sure that vmprof does not segfault in presence of continuation.switch (and thus with greenlets, eventlet, etc.) Message-ID: <5a0c5832.45c1df0a.c8bc7.467c@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93039:927cc69f4d52 Date: 2017-11-15 16:06 +0100 http://bitbucket.org/pypy/pypy/changeset/927cc69f4d52/ Log: merge the fix-vmprof-stacklet-switch: make sure that vmprof does not segfault in presence of continuation.switch (and thus with greenlets, eventlet, etc.) diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -93,8 +93,8 @@ return space.newtext(path) def stop_sampling(space): - return space.newint(rvmprof.stop_sampling(space)) + return space.newint(rvmprof.stop_sampling()) def start_sampling(space): - rvmprof.start_sampling(space) + rvmprof.start_sampling() return space.w_None diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,6 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rlib import rvmprof from rpython.rlib.rvmprof import cintf DEBUG = False @@ -40,11 +41,13 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) + rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) + rvmprof.start_sampling() if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -55,9 +55,9 @@ return None -def stop_sampling(space): +def stop_sampling(): fd = _get_vmprof().cintf.vmprof_stop_sampling() return rffi.cast(lltype.Signed, fd) -def start_sampling(space): +def start_sampling(): _get_vmprof().cintf.vmprof_start_sampling() diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py --- a/rpython/rlib/test/test_rstacklet.py +++ b/rpython/rlib/test/test_rstacklet.py @@ -10,6 +10,8 @@ from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT from rpython.rlib import rrandom, rgc from rpython.rlib.rarithmetic import intmask +from rpython.rlib.nonconst import NonConstant +from rpython.rlib import rvmprof from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.translator.c.test.test_standalone import StandaloneTests @@ -273,7 +275,23 @@ llmemory.raw_free(raw) +# +# bah, we need to make sure that vmprof_execute_code is annotated, else +# rvmprof.c does not compile correctly +class FakeVMProfCode(object): + pass +rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name') + at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) +def fake_vmprof_main(code, num): + return 42 +# + def entry_point(argv): + # + if NonConstant(False): + fake_vmprof_main(FakeVMProfCode(), 42) + # + # seed = 0 if len(argv) > 1: seed = int(argv[1]) From pypy.commits at gmail.com Wed Nov 15 10:17:22 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 07:17:22 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Merged in thisch/pypy/py3.5 (pull request #584) Message-ID: <5a0c5a82.c6a2df0a.13ea3.d96c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93042:dfff7758834e Date: 2017-11-15 15:16 +0000 http://bitbucket.org/pypy/pypy/changeset/dfff7758834e/ Log: Merged in thisch/pypy/py3.5 (pull request #584) Change return type of os.times to posix.times_result diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -18,6 +18,7 @@ 'error': 'app_posix.error', 'stat_result': 'app_posix.stat_result', 'statvfs_result': 'app_posix.statvfs_result', + 'times_result': 'app_posix.times_result', 'uname_result': 'app_posix.uname_result', 'urandom': 'app_posix.urandom', 'terminal_size': 'app_posix.terminal_size', diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py --- a/pypy/module/posix/app_posix.py +++ b/pypy/module/posix/app_posix.py @@ -122,6 +122,19 @@ else: _validate_fd = validate_fd + +class times_result(metaclass=structseqtype): + + name = "posix.times_result" + __module__ = "posix" + + user = structseqfield(0, "user time") + system = structseqfield(1, "system time") + children_user = structseqfield(2, "user time of children") + children_system = structseqfield(3, "system time of children") + elapsed = structseqfield(4, "elapsed time since an arbitray point in the past") + + if osname == 'posix': def wait(): """ wait() -> (pid, status) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -684,11 +684,17 @@ except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) else: - return space.newtuple([space.newfloat(times[0]), - space.newfloat(times[1]), - space.newfloat(times[2]), - space.newfloat(times[3]), - space.newfloat(times[4])]) + w_keywords = space.newdict() + w_tuple = space.newtuple([space.newfloat(times[0]), + space.newfloat(times[1]), + space.newfloat(times[2]), + space.newfloat(times[3]), + space.newfloat(times[4])]) + + w_times_result = space.getattr(space.getbuiltinmodule(os.name), + space.newtext('times_result')) + return space.call_function(w_times_result, w_tuple, w_keywords) + @unwrap_spec(command='fsencode') def system(space, command): diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -378,16 +378,21 @@ def test_times(self): """ - posix.times() should return a five-tuple giving float-representations - (seconds, effectively) of the four fields from the underlying struct - tms and the return value. + posix.times() should return a posix.times_result object giving + float-representations (seconds, effectively) of the four fields from + the underlying struct tms and the return value. """ result = self.posix.times() - assert isinstance(result, tuple) + assert isinstance(self.posix.times(), self.posix.times_result) + assert isinstance(self.posix.times(), tuple) assert len(result) == 5 for value in result: assert isinstance(value, float) - + assert isinstance(result.user, float) + assert isinstance(result.system, float) + assert isinstance(result.children_user, float) + assert isinstance(result.children_system, float) + assert isinstance(result.elapsed, float) def test_strerror(self): assert isinstance(self.posix.strerror(0), str) From pypy.commits at gmail.com Wed Nov 15 10:17:32 2017 From: pypy.commits at gmail.com (thisch) Date: Wed, 15 Nov 2017 07:17:32 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Change return type of os.times to posix.times_result Message-ID: <5a0c5a8c.44841c0a.152ca.eb1b@mx.google.com> Author: Thomas Hisch Branch: py3.5 Changeset: r93040:852c26ea2a1c Date: 2017-11-12 21:49 +0100 http://bitbucket.org/pypy/pypy/changeset/852c26ea2a1c/ Log: Change return type of os.times to posix.times_result The return type was changed in CPython3.3. Related: #2375 diff --git a/pypy/module/posix/__init__.py b/pypy/module/posix/__init__.py --- a/pypy/module/posix/__init__.py +++ b/pypy/module/posix/__init__.py @@ -18,6 +18,7 @@ 'error': 'app_posix.error', 'stat_result': 'app_posix.stat_result', 'statvfs_result': 'app_posix.statvfs_result', + 'times_result': 'app_posix.times_result', 'uname_result': 'app_posix.uname_result', 'urandom': 'app_posix.urandom', 'terminal_size': 'app_posix.terminal_size', diff --git a/pypy/module/posix/app_posix.py b/pypy/module/posix/app_posix.py --- a/pypy/module/posix/app_posix.py +++ b/pypy/module/posix/app_posix.py @@ -122,6 +122,19 @@ else: _validate_fd = validate_fd + +class times_result(metaclass=structseqtype): + + name = "posix.times_result" + __module__ = "posix" + + user = structseqfield(0, "user time") + system = structseqfield(1, "system time") + children_user = structseqfield(2, "user time of children") + children_system = structseqfield(3, "system time of children") + elapsed = structseqfield(4, "elapsed time since an arbitray point in the past") + + if osname == 'posix': def wait(): """ wait() -> (pid, status) diff --git a/pypy/module/posix/interp_posix.py b/pypy/module/posix/interp_posix.py --- a/pypy/module/posix/interp_posix.py +++ b/pypy/module/posix/interp_posix.py @@ -684,11 +684,17 @@ except OSError as e: raise wrap_oserror(space, e, eintr_retry=False) else: - return space.newtuple([space.newfloat(times[0]), - space.newfloat(times[1]), - space.newfloat(times[2]), - space.newfloat(times[3]), - space.newfloat(times[4])]) + w_keywords = space.newdict() + w_tuple = space.newtuple([space.newfloat(times[0]), + space.newfloat(times[1]), + space.newfloat(times[2]), + space.newfloat(times[3]), + space.newfloat(times[4])]) + + w_times_result = space.getattr(space.getbuiltinmodule(os.name), + space.newtext('times_result')) + return space.call_function(w_times_result, w_tuple, w_keywords) + @unwrap_spec(command='fsencode') def system(space, command): From pypy.commits at gmail.com Wed Nov 15 10:17:34 2017 From: pypy.commits at gmail.com (thisch) Date: Wed, 15 Nov 2017 07:17:34 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Improve test for posix.times() Message-ID: <5a0c5a8e.88acdf0a.4f669.fa2a@mx.google.com> Author: Thomas Hisch Branch: py3.5 Changeset: r93041:898194d1902c Date: 2017-11-13 23:44 +0100 http://bitbucket.org/pypy/pypy/changeset/898194d1902c/ Log: Improve test for posix.times() Test that posix.times() returns a times_result object. diff --git a/pypy/module/posix/test/test_posix2.py b/pypy/module/posix/test/test_posix2.py --- a/pypy/module/posix/test/test_posix2.py +++ b/pypy/module/posix/test/test_posix2.py @@ -378,16 +378,21 @@ def test_times(self): """ - posix.times() should return a five-tuple giving float-representations - (seconds, effectively) of the four fields from the underlying struct - tms and the return value. + posix.times() should return a posix.times_result object giving + float-representations (seconds, effectively) of the four fields from + the underlying struct tms and the return value. """ result = self.posix.times() - assert isinstance(result, tuple) + assert isinstance(self.posix.times(), self.posix.times_result) + assert isinstance(self.posix.times(), tuple) assert len(result) == 5 for value in result: assert isinstance(value, float) - + assert isinstance(result.user, float) + assert isinstance(result.system, float) + assert isinstance(result.children_user, float) + assert isinstance(result.children_system, float) + assert isinstance(result.elapsed, float) def test_strerror(self): assert isinstance(self.posix.strerror(0), str) From pypy.commits at gmail.com Wed Nov 15 11:24:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 08:24:21 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix test to work on PyPy Message-ID: <5a0c6a35.8fa3df0a.b0eb2.5da0@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93043:12f1fb4860ac Date: 2017-11-15 16:23 +0000 http://bitbucket.org/pypy/pypy/changeset/12f1fb4860ac/ Log: Fix test to work on PyPy diff --git a/lib-python/3/test/test_inspect.py b/lib-python/3/test/test_inspect.py --- a/lib-python/3/test/test_inspect.py +++ b/lib-python/3/test/test_inspect.py @@ -765,12 +765,15 @@ self.assertFullArgSpecEquals(_pickle.Pickler(io.BytesIO()).dump, args_e=['self', 'obj'], formatted='(self, obj)') + # platform-dependent on PyPy + default_fd = os.stat.__kwdefaults__['dir_fd'] + self.assertFullArgSpecEquals( os.stat, args_e=['path'], kwonlyargs_e=['dir_fd', 'follow_symlinks'], - kwonlydefaults_e={'dir_fd': None, 'follow_symlinks': True}, - formatted='(path, *, dir_fd=None, follow_symlinks=True)') + kwonlydefaults_e={'dir_fd': default_fd, 'follow_symlinks': True}, + formatted='(path, *, dir_fd={}, follow_symlinks=True)'.format(default_fd)) @unittest.skipIf(MISSING_C_DOCSTRINGS, "Signature information for builtins requires docstrings") From pypy.commits at gmail.com Wed Nov 15 11:44:17 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 15 Nov 2017 08:44:17 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: general progress towards moving more of the infrastructure from runicode towards unicodehelper, which helps us to deal with surrogates nicely Message-ID: <5a0c6ee1.7a94500a.793c7.17fe@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93044:1d6d78e72d50 Date: 2017-11-15 17:43 +0100 http://bitbucket.org/pypy/pypy/changeset/1d6d78e72d50/ Log: general progress towards moving more of the infrastructure from runicode towards unicodehelper, which helps us to deal with surrogates nicely diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,6 +1,7 @@ from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize from rpython.rlib import runicode, rutf8 +from rpython.rlib.rarithmetic import r_uint from rpython.rlib.rstring import StringBuilder from pypy.module._codecs import interp_codecs @@ -43,6 +44,15 @@ from pypy.objspace.std.unicodeobject import encode_object return encode_object(space, w_data, encoding, errors) +def combine_flags(one, two): + if one == rutf8.FLAG_ASCII and two == rutf8.FLAG_ASCII: + return rutf8.FLAG_ASCII + elif (one == rutf8.FLAG_HAS_SURROGATES or + two == rutf8.FLAG_HAS_SURROGATES): + return rutf8.FLAG_HAS_SURROGATES + return rutf8.FLAG_REGULAR + + def _has_surrogate(u): for c in u: if 0xD800 <= ord(c) <= 0xDFFF: @@ -58,25 +68,221 @@ flag = rutf8.FLAG_REGULAR return flag +def hexescape(builder, s, pos, digits, + encoding, errorhandler, message, errors): + chr = 0 + if pos + digits > len(s): + endinpos = pos + while endinpos < len(s) and s[endinpos] in hexdigits: + endinpos += 1 + uuu + res, size, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + try: + chr = r_uint(int(s[pos:pos+digits], 16)) + except ValueError: + aaaa + endinpos = pos + while s[endinpos] in hexdigits: + endinpos += 1 + res, pos = errorhandler(errors, encoding, + message, s, pos-2, endinpos) + builder.append(res) + else: + # when we get here, chr is a 32-bit unicode character + if chr > 0x10ffff: + UUU + message = "illegal Unicode character" + res, pos = errorhandler(errors, encoding, + message, s, pos-2, pos+digits) + builder.append(res) + else: + rutf8.unichr_as_utf8_append(builder, chr, True) + if chr <= 0x7f: + flag = rutf8.FLAG_ASCII + elif 0xd800 <= chr <= 0xdfff: + flag = rutf8.FLAG_HAS_SURROGATES + else: + flag = rutf8.FLAG_REGULAR + pos += digits + size = 1 + + return pos, size, flag + +def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): + size = len(s) + if size == 0: + return '', 0, 0, rutf8.FLAG_ASCII + + flag = rutf8.FLAG_ASCII + builder = StringBuilder(size) + pos = 0 + outsize = 0 + while pos < size: + ch = s[pos] + + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + if ord(ch) > 0x7F: + rutf8.unichr_as_utf8_append(builder, ord(ch)) + flag = combine_flags(rutf8.FLAG_REGULAR, flag) + else: + builder.append(ch) + pos += 1 + outsize += 1 + continue + + # - Escapes + pos += 1 + if pos >= size: + message = "\\ at end of string" + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, size) + newsize, newflag = rutf8.check_utf8(res, True) + outsize + newsize + flag = combine_flags(flag, newflag) + builder.append(res) + continue + + ch = s[pos] + pos += 1 + # \x escapes + if ch == '\n': pass + elif ch == '\\': + builder.append('\\') + outsize += 1 + elif ch == '\'': + builder.append('\'') + outsize += 1 + elif ch == '\"': + builder.append('\"') + outsize += 1 + elif ch == 'b' : + builder.append('\b') + outsize += 1 + elif ch == 'f' : + builder.append('\f') + outsize += 1 + elif ch == 't' : + builder.append('\t') + outsize += 1 + elif ch == 'n' : + builder.append('\n') + outsize += 1 + elif ch == 'r' : + builder.append('\r') + outsize += 1 + elif ch == 'v' : + builder.append('\v') + outsize += 1 + elif ch == 'a' : + builder.append('\a') + outsize += 1 + elif '0' <= ch <= '7': + x = ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + if pos < size: + ch = s[pos] + if '0' <= ch <= '7': + pos += 1 + x = (x<<3) + ord(ch) - ord('0') + outsize += 1 + if x >= 0x7F: + rutf8.unichr_as_utf8_append(builder, x) + flag = combine_flags(rutf8.FLAG_REGULAR, flag) + else: + builder.append(chr(x)) + # hex escapes + # \xXX + elif ch == 'x': + digits = 2 + message = "truncated \\xXX escape" + pos, newsize, newflag = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + flag = combine_flags(flag, newflag) + outsize += newsize + + # \uXXXX + elif ch == 'u': + digits = 4 + message = "truncated \\uXXXX escape" + pos, newsize, newflag = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + flag = combine_flags(flag, newflag) + outsize += newsize + + # \UXXXXXXXX + elif ch == 'U': + digits = 8 + message = "truncated \\UXXXXXXXX escape" + pos, newsize, newflag = hexescape(builder, s, pos, digits, + "unicodeescape", errorhandler, message, errors) + flag = combine_flags(flag, newflag) + outsize += newsize + + # \N{name} + elif ch == 'N' and ud_handler is not None: + message = "malformed \\N character escape" + look = pos + + if look < size and s[look] == '{': + # look for the closing brace + while look < size and s[look] != '}': + look += 1 + if look < size and s[look] == '}': + # found a name. look it up in the unicode database + message = "unknown Unicode character name" + name = s[pos+1:look] + code = ud_handler.call(name) + if code < 0: + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + newsize, newflag = rutf8.check_utf8(res, True) + flag = combine_flags(flag, newflag) + outsize += newsize + builder.append(res) + continue + pos = look + 1 + XXX + if code <= MAXUNICODE: + builder.append(UNICHR(code)) + else: + code -= 0x10000L + builder.append(unichr(0xD800 + (code >> 10))) + builder.append(unichr(0xDC00 + (code & 0x03FF))) + else: + YYY + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + AAA + res, pos = errorhandler(errors, "unicodeescape", + message, s, pos-1, look+1) + builder.append(res) + else: + builder.append('\\') + builder.append(ch) + outsize += 2 + + return builder.build(), pos, outsize, flag + # These functions take and return unwrapped rpython strings and unicodes def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) unicodedata_handler = state.get_unicodedata_handler(space) - # XXX pick better length, maybe - # XXX that guy does not belong in runicode (nor in rutf8) - result_u, consumed = runicode.str_decode_unicode_escape( - string, len(string), "strict", - final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle, - unicodedata_handler=unicodedata_handler) - # XXX argh. we want each surrogate to be encoded separately - utf8 = result_u.encode('utf8') - if rutf8.first_non_ascii_char(utf8) == -1: - flag = rutf8.FLAG_ASCII - elif _has_surrogate(result_u): - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR - return utf8, len(result_u), flag + result_utf8, consumed, length, flag = str_decode_unicode_escape( + string, "strict", + final=True, + errorhandler=decode_error_handler(space), + ud_handler=unicodedata_handler) + return result_utf8, length, flag def decode_raw_unicode_escape(space, string): # XXX pick better length, maybe @@ -111,8 +317,10 @@ try: length, flag = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError as e: + # convert position into unicode position + lgt, flags = rutf8.check_utf8(string, True, stop=e.pos) decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, - e.pos, e.pos + 1) + lgt, lgt + 1) assert False, "unreachable" return length, flag @@ -131,23 +339,28 @@ # DEPRECATED return (s, check_utf8_or_raise(space, s)) -def utf8_encode_ascii(utf8, utf8len, errors, errorhandler): - if len(utf8) == utf8len: - return utf8 - # No Way At All to emulate the calls to the error handler in - # less than three pages, so better not. - u = utf8.decode("utf8") - w = EncodeWrapper(errorhandler) - return runicode.unicode_encode_ascii(u, len(u), errors, w.handle) - -def str_decode_ascii(s, slen, errors, final, errorhandler): +def str_decode_ascii(s, errors, final, errorhandler): try: rutf8.check_ascii(s) - return s, slen, len(s), rutf8.FLAG_ASCII + return s, len(s), len(s), rutf8.FLAG_ASCII except rutf8.CheckError: - w = DecodeWrapper((errorhandler)) - u, pos = runicode.str_decode_ascii(s, slen, errors, final, w.handle) - return u.encode('utf8'), pos, len(u), _get_flag(u) + return _str_decode_ascii_slowpath(s, errors, final, errorhandler) + +def _str_decode_ascii_slowpath(s, errors, final, errorhandler): + i = 0 + res = StringBuilder() + while i < len(s): + ch = s[i] + if ord(ch) > 0x7F: + r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)', + s, i, i + 1) + res.append(r) + else: + res.append(ch) + i += 1 + ress = res.build() + lgt, flag = rutf8.check_utf8(ress, True) + return ress, len(s), lgt, flag # XXX wrappers, think about speed @@ -165,21 +378,14 @@ def handle(self, errors, encoding, msg, s, pos, endpos): return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) -# some irregular interfaces -def str_decode_utf8(s, slen, errors, final, errorhandler): - w = DecodeWrapper(errorhandler) - u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle, - runicode.allow_surrogate_by_default) - return u.encode('utf8'), pos, len(u), _get_flag(u) +#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler): +# w = DecodeWrapper(errorhandler) +# u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final, +# w.handle, +# ud_handler) +# return u.encode('utf8'), pos, len(u), _get_flag(u) -def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler): - w = DecodeWrapper(errorhandler) - u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final, - w.handle, - ud_handler) - return u.encode('utf8'), pos, len(u), _get_flag(u) - -def setup_new_encoders(encoding): +def setup_new_encoders_legacy(encoding): encoder_name = 'utf8_encode_' + encoding encoder_call_name = 'unicode_encode_' + encoding decoder_name = 'str_decode_' + encoding @@ -200,9 +406,322 @@ globals()[decoder_name] = decoder def setup(): - for encoding in ['utf_7', 'unicode_escape', 'raw_unicode_escape', + for encoding in ['raw_unicode_escape', 'utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32', 'utf_32_be', 'latin_1', 'unicode_internal']: - setup_new_encoders(encoding) + setup_new_encoders_legacy(encoding) setup() + +def utf8_encode_ascii(utf8, errors, errorhandler): + """ Don't be confused - this is a slowpath for errors e.g. "ignore" + or an obscure errorhandler + """ + res = StringBuilder() + i = 0 + pos = 0 + while i < len(utf8): + ch = rutf8.codepoint_at_pos(utf8, i) + if ch >= 0x7F: + msg = "ordinal not in range(128)" + r, newpos = errorhandler(errors, 'ascii', msg, utf8, + pos, pos + 1) + for _ in range(newpos - pos): + i = rutf8.next_codepoint_pos(utf8, i) + pos = newpos + res.append(r) + else: + res.append(chr(ch)) + i = rutf8.next_codepoint_pos(utf8, i) + pos += 1 + + s = res.build() + return s + +# some irregular interfaces +def str_decode_utf8(s, slen, errors, final, errorhandler): + xxxx + + u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle, + runicode.allow_surrogate_by_default) + return u.encode('utf8'), pos, len(u), _get_flag(u) + +# ____________________________________________________________ +# utf-7 + +# Three simple macros defining base-64 + +def _utf7_IS_BASE64(oc): + "Is c a base-64 character?" + c = chr(oc) + return c.isalnum() or c == '+' or c == '/' +def _utf7_TO_BASE64(n): + "Returns the base-64 character of the bottom 6 bits of n" + return "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[n & 0x3f] +def _utf7_FROM_BASE64(c): + "given that c is a base-64 character, what is its base-64 value?" + if c >= 'a': + return ord(c) - 71 + elif c >= 'A': + return ord(c) - 65 + elif c >= '0': + return ord(c) + 4 + elif c == '+': + return 62 + else: # c == '/' + return 63 + +def _utf7_DECODE_DIRECT(oc): + return oc <= 127 and oc != ord('+') + +# The UTF-7 encoder treats ASCII characters differently according to +# whether they are Set D, Set O, Whitespace, or special (i.e. none of +# the above). See RFC2152. This array identifies these different +# sets: +# 0 : "Set D" +# alphanumeric and '(),-./:? +# 1 : "Set O" +# !"#$%&*;<=>@[]^_`{|} +# 2 : "whitespace" +# ht nl cr sp +# 3 : special (must be base64 encoded) +# everything else (i.e. +\~ and non-printing codes 0-8 11-12 14-31 127) + +utf7_category = [ +# nul soh stx etx eot enq ack bel bs ht nl vt np cr so si + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, +# dle dc1 dc2 dc3 dc4 nak syn etb can em sub esc fs gs rs us + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +# sp ! " # $ % & ' ( ) * + , - . / + 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, +# 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, +# @ A B C D E F G H I J K L M N O + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +# P Q R S T U V W X Y Z [ \ ] ^ _ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, +# ` a b c d e f g h i j k l m n o + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +# p q r s t u v w x y z { | } ~ del + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, +] + +# ENCODE_DIRECT: this character should be encoded as itself. The +# answer depends on whether we are encoding set O as itself, and also +# on whether we are encoding whitespace as itself. RFC2152 makes it +# clear that the answers to these questions vary between +# applications, so this code needs to be flexible. + +def _utf7_ENCODE_DIRECT(oc, directO, directWS): + return(oc < 128 and oc > 0 and + (utf7_category[oc] == 0 or + (directWS and utf7_category[oc] == 2) or + (directO and utf7_category[oc] == 1))) + +def _utf7_ENCODE_CHAR(result, oc, base64bits, base64buffer): + if oc >= 0x10000: + # code first surrogate + base64bits += 16 + base64buffer = (base64buffer << 16) | 0xd800 | ((oc-0x10000) >> 10) + while base64bits >= 6: + result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6))) + base64bits -= 6 + # prepare second surrogate + oc = 0xDC00 | ((oc-0x10000) & 0x3FF) + base64bits += 16 + base64buffer = (base64buffer << 16) | oc + while base64bits >= 6: + result.append(_utf7_TO_BASE64(base64buffer >> (base64bits-6))) + base64bits -= 6 + return base64bits, base64buffer + +def str_decode_utf_7(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0, rutf8.FLAG_ASCII + + inShift = False + base64bits = 0 + base64buffer = 0 + surrogate = 0 + outsize = 0 + + result = StringBuilder(size) + pos = 0 + shiftOutStartPos = 0 + flag = rutf8.FLAG_ASCII + startinpos = 0 + while pos < size: + ch = s[pos] + + if inShift: # in a base-64 section + if _utf7_IS_BASE64(ord(ch)): #consume a base-64 character + base64buffer = (base64buffer << 6) | _utf7_FROM_BASE64(ch) + base64bits += 6 + pos += 1 + + if base64bits >= 16: + # enough bits for a UTF-16 value + outCh = base64buffer >> (base64bits - 16) + base64bits -= 16 + base64buffer &= (1 << base64bits) - 1 # clear high bits + assert outCh <= 0xffff + if surrogate: + # expecting a second surrogate + if outCh >= 0xDC00 and outCh <= 0xDFFF: + xxxx + result.append( + UNICHR((((surrogate & 0x3FF)<<10) | + (outCh & 0x3FF)) + 0x10000)) + surrogate = 0 + continue + else: + YYYY + result.append(unichr(surrogate)) + surrogate = 0 + # Not done with outCh: falls back to next line + if outCh >= 0xD800 and outCh <= 0xDBFF: + # first surrogate + surrogate = outCh + else: + flag = combine_flags(flag, rutf8.unichr_to_flag(outCh)) + outsize += 1 + rutf8.unichr_as_utf8_append(result, outCh, True) + + else: + # now leaving a base-64 section + inShift = False + + if base64bits > 0: # left-over bits + if base64bits >= 6: + # We've seen at least one base-64 character + aaa + pos += 1 + msg = "partial character in shift sequence" + res, pos = errorhandler(errors, 'utf7', + msg, s, pos-1, pos) + result.append(res) + continue + else: + # Some bits remain; they should be zero + if base64buffer != 0: + bbb + pos += 1 + msg = "non-zero padding bits in shift sequence" + res, pos = errorhandler(errors, 'utf7', + msg, s, pos-1, pos) + result.append(res) + continue + + if surrogate and _utf7_DECODE_DIRECT(ord(ch)): + outsize += 1 + flag = rutf8.FLAG_HAS_SURROGATES + rutf8.unichr_as_utf8_append(result, surrogate, True) + surrogate = 0 + + if ch == '-': + # '-' is absorbed; other terminating characters are + # preserved + pos += 1 + + elif ch == '+': + startinpos = pos + pos += 1 # consume '+' + if pos < size and s[pos] == '-': # '+-' encodes '+' + pos += 1 + result.append('+') + outsize += 1 + else: # begin base64-encoded section + inShift = 1 + surrogate = 0 + shiftOutStartPos = result.getlength() + base64bits = 0 + base64buffer = 0 + + elif _utf7_DECODE_DIRECT(ord(ch)): # character decodes at itself + xxx + result.append(unichr(ord(ch))) + pos += 1 + else: + yyy + startinpos = pos + pos += 1 + msg = "unexpected special character" + res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) + result.append(res) + + # end of string + final_length = result.getlength() + if inShift and final: # in shift sequence, no more to follow + # if we're in an inconsistent state, that's an error + inShift = 0 + if (surrogate or + base64bits >= 6 or + (base64bits > 0 and base64buffer != 0)): + msg = "unterminated shift sequence" + xxxx + res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) + reslen, resflags = rutf8.check_utf8(res, True) + outsize += reslen + flag = combine_flags(flag, resflags) + result.append(res) + final_length = result.getlength() + elif inShift: + pos = startinpos + final_length = shiftOutStartPos # back off output + + assert final_length >= 0 + return result.build()[:final_length], pos, outsize, flag + +def utf8_encode_utf_7(s, errors, errorhandler=None): + size = len(s) + if size == 0: + return '' + result = StringBuilder(size) + + encodeSetO = encodeWhiteSpace = False + + inShift = False + base64bits = 0 + base64buffer = 0 + + pos = 0 + while pos < size: + oc = rutf8.codepoint_at_pos(s, pos) + if not inShift: + if oc == ord('+'): + result.append('+-') + elif _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace): + result.append(chr(oc)) + else: + result.append('+') + inShift = True + base64bits, base64buffer = _utf7_ENCODE_CHAR( + result, oc, base64bits, base64buffer) + else: + if _utf7_ENCODE_DIRECT(oc, not encodeSetO, not encodeWhiteSpace): + # shifting out + if base64bits: # output remaining bits + result.append(_utf7_TO_BASE64(base64buffer << (6-base64bits))) + base64buffer = 0 + base64bits = 0 + + inShift = False + ## Characters not in the BASE64 set implicitly unshift the + ## sequence so no '-' is required, except if the character is + ## itself a '-' + if _utf7_IS_BASE64(oc) or oc == ord('-'): + result.append('-') + result.append(chr(oc)) + else: + base64bits, base64buffer = _utf7_ENCODE_CHAR( + result, oc, base64bits, base64buffer) + pos = rutf8.next_codepoint_pos(s, pos) + + if base64bits: + result.append(_utf7_TO_BASE64(base64buffer << (6 - base64bits))) + if inShift: + result.append('-') + + return result.build() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -37,6 +37,7 @@ if decode: w_cls = space.w_UnicodeDecodeError w_input = space.newbytes(input) + length = len(input) else: w_cls = space.w_UnicodeEncodeError length, flag = rutf8.check_utf8(input, allow_surrogates=True) @@ -61,17 +62,13 @@ w_replace, w_newpos = space.fixedview(w_res, 2) newpos = space.int_w(w_newpos) if newpos < 0: - newpos = len(input) + newpos - if newpos < 0 or newpos > len(input): + newpos = length + newpos + if newpos < 0 or newpos > length: raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", newpos) w_replace = space.convert_to_w_unicode(w_replace) - replace = w_replace._utf8.decode('utf8') - if decode: - return replace, newpos - else: - return replace, None, newpos + return w_replace._utf8, newpos return call_errorhandler def make_decode_errorhandler(self, space): @@ -384,8 +381,7 @@ func = getattr(unicodehelper, rname) utf8len = w_arg._length # XXX deal with func() returning length or not - result = func(w_arg._utf8, utf8len, - errors, state.encode_error_handler) + result = func(w_arg._utf8, errors, state.encode_error_handler) return space.newtuple([space.newbytes(result), space.newint(utf8len)]) wrap_encoder.func_name = rname globals()[name] = wrap_encoder @@ -403,7 +399,7 @@ final = space.is_true(w_final) state = space.fromcache(CodecState) func = getattr(unicodehelper, rname) - result, consumed, length, flag = func(string, len(string), errors, + result, consumed, length, flag = func(string, errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(result, length, flag), space.newint(consumed)]) @@ -476,8 +472,6 @@ try: lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError: - # XXX do the way around runicode - we can optimize it later if we - # decide we care about obscure cases res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, len(string), errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt, flag), @@ -695,7 +689,7 @@ unicode_name_handler = state.get_unicodedata_handler(space) result, consumed, lgt, flag = unicodehelper.str_decode_unicode_escape( - string, len(string), errors, + string, errors, final, state.decode_error_handler, unicode_name_handler) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -9,7 +9,6 @@ from rpython.rlib.rstring import ( StringBuilder, split, rsplit, UnicodeBuilder, replace_count, startswith, endswith) -from rpython.rlib.runicode import make_unicode_escape_function from rpython.rlib import rutf8, jit from pypy.interpreter import unicodehelper @@ -48,9 +47,16 @@ else: assert flag == rutf8.FLAG_REGULAR self._index_storage = rutf8.null_storage() + # XXX checking, remove before any performance measurments + # ifdef not_running_in_benchmark lgt, flag_check = rutf8.check_utf8(utf8str, True) assert lgt == length - assert flag == flag_check + if flag_check == rutf8.FLAG_ASCII: + # there are cases where we copy part of REULAR that happens + # to be ascii + assert flag in (rutf8.FLAG_ASCII, rutf8.FLAG_REGULAR) + else: + assert flag == flag_check # the storage can be one of: # - null, unicode with no surrogates # - rutf8.UTF8_HAS_SURROGATES @@ -351,7 +357,7 @@ elif unicodedb.islower(ch): ch = unicodedb.toupper(ch) if ch >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, ch) return W_UnicodeObject(builder.build(), self._length, flag) @@ -376,7 +382,7 @@ else: ch = unicodedb.tolower(ch) if ch >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, ch) previous_is_cased = unicodedb.iscased(ch) return builder.build(), flag @@ -402,7 +408,7 @@ codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): result.append(w_newval._utf8) - flag = self._combine_flags(flag, w_newval._get_flag()) + flag = unicodehelper.combine_flags(flag, w_newval._get_flag()) result_length += w_newval._length continue else: @@ -411,7 +417,7 @@ "or unicode") try: if codepoint >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(result, codepoint, allow_surrogates=True) result_length += 1 @@ -535,7 +541,7 @@ while pos < len(self._utf8): lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) if lower >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates? pos = rutf8.next_codepoint_pos(self._utf8, pos) return W_UnicodeObject(builder.build(), self._len(), flag) @@ -623,15 +629,6 @@ return True return endswith(value, prefix, start, end) - @staticmethod - def _combine_flags(self_flag, other_flag): - if self_flag == rutf8.FLAG_ASCII and other_flag == rutf8.FLAG_ASCII: - return rutf8.FLAG_ASCII - elif (self_flag == rutf8.FLAG_HAS_SURROGATES or - other_flag == rutf8.FLAG_HAS_SURROGATES): - return rutf8.FLAG_HAS_SURROGATES - return rutf8.FLAG_REGULAR - def _get_flag(self): if self.is_ascii(): return rutf8.FLAG_ASCII @@ -646,7 +643,7 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - flag = self._combine_flags(self._get_flag(), w_other._get_flag()) + flag = unicodehelper.combine_flags(self._get_flag(), w_other._get_flag()) return W_UnicodeObject(self._utf8 + w_other._utf8, self._len() + w_other._len(), flag) @@ -671,7 +668,7 @@ # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) - flag = self._combine_flags(flag, w_u._get_flag()) + flag = unicodehelper.combine_flags(flag, w_u._get_flag()) unwrapped.append(w_u._utf8) lgt += w_u._length prealloc_size += len(unwrapped[i]) @@ -723,7 +720,7 @@ uchar = rutf8.codepoint_at_pos(value, i) uchar = unicodedb.toupper(uchar) if uchar >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) i = rutf8.next_codepoint_pos(value, i) rutf8.unichr_as_utf8_append(builder, uchar) return W_UnicodeObject(builder.build(), self._length, flag) @@ -837,14 +834,14 @@ ch = unicodedb.toupper(uchar) rutf8.unichr_as_utf8_append(builder, ch) if ch >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) while i < len(value): uchar = rutf8.codepoint_at_pos(value, i) i = rutf8.next_codepoint_pos(value, i) ch = unicodedb.tolower(uchar) rutf8.unichr_as_utf8_append(builder, ch) if ch >= 0x80: - flag = self._combine_flags(flag, rutf8.FLAG_REGULAR) + flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) return W_UnicodeObject(builder.build(), self._len(), flag) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) @@ -930,7 +927,7 @@ except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - flag = self._combine_flags(self._get_flag(), w_by._get_flag()) + flag = unicodehelper.combine_flags(self._get_flag(), w_by._get_flag()) newlength = self._length + replacements * (w_by._length - w_sub._length) return W_UnicodeObject(res, newlength, flag) @@ -1052,7 +1049,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "rjust() argument 2 must be a single character") - flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag()) + flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - lgt if d > 0: if len(w_fillchar._utf8) == 1: @@ -1071,7 +1068,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "ljust() argument 2 must be a single character") - flag = self._combine_flags(self._get_flag(), w_fillchar._get_flag()) + flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - self._len() if d > 0: if len(w_fillchar._utf8) == 1: diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -452,6 +452,13 @@ ('ofs', lltype.FixedSizeArray(lltype.Char, 16))) )))) +def unichr_to_flag(ch): + if ch <= 0x7F: + return FLAG_ASCII + elif 0xD800 <= ch <= 0xDFFF: + return FLAG_HAS_SURROGATES + return FLAG_REGULAR + FLAG_REGULAR = 0 FLAG_HAS_SURROGATES = 1 FLAG_ASCII = 2 From pypy.commits at gmail.com Wed Nov 15 12:27:40 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 15 Nov 2017 09:27:40 -0800 (PST) Subject: [pypy-commit] pypy default: fix test_whatsnew Message-ID: <5a0c790c.c78c1c0a.f0d32.730d@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93045:95e0fdd7cd86 Date: 2017-11-15 18:26 +0100 http://bitbucket.org/pypy/pypy/changeset/95e0fdd7cd86/ Log: fix test_whatsnew diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -20,3 +20,9 @@ .. branch: run-extra-tests Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) From pypy.commits at gmail.com Wed Nov 15 13:27:28 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 15 Nov 2017 10:27:28 -0800 (PST) Subject: [pypy-commit] pypy rpython-20: start a branch to play with stronger type guarantees Message-ID: <5a0c8710.8dd71c0a.4e787.9218@mx.google.com> Author: fijal Branch: rpython-20 Changeset: r93046:5f87d65c7f82 Date: 2017-11-15 19:26 +0100 http://bitbucket.org/pypy/pypy/changeset/5f87d65c7f82/ Log: start a branch to play with stronger type guarantees diff --git a/rpython/annotator/model.py b/rpython/annotator/model.py --- a/rpython/annotator/model.py +++ b/rpython/annotator/model.py @@ -47,19 +47,33 @@ allow_int_to_float = True TLS = State() +def compare_dict(d1, d2, ommit): + for k, v in d1.iteritems(): + if k in ommit: + continue + if k not in d2 or v != d2[k]: + return False + for k, v in d2.iteritems(): + if k in ommit: + continue + if k not in d1: # don't need to compare again + return False + return True + class SomeObject(object): """The set of all objects. Each instance stands for an arbitrary object about which nothing is known.""" __metaclass__ = extendabletype immutable = False knowntype = object + can_union = True def __init__(self): assert type(self) is not SomeObject def __eq__(self, other): return (self.__class__ is other.__class__ and - self.__dict__ == other.__dict__) + compare_dict(self.__dict__, other.__dict__, ('can_union',))) def __ne__(self, other): return not (self == other) @@ -74,7 +88,7 @@ else: reprdict[self] = True try: - items = self.__dict__.items() + items = [x for x in self.__dict__.items() if x[0] != 'can_union'] items.sort() args = [] for k, v in items: @@ -269,11 +283,10 @@ d1 = self.__dict__ d2 = other.__dict__ if not TLS.check_str_without_nul: - d1 = d1.copy() - d1['no_nul'] = 0 - d2 = d2.copy() - d2['no_nul'] = 0 - return d1 == d2 + ommit = ('no_nul', 'can_union') + else: + ommit = () + return compare_dict(d1, d2, ommit) def nonnoneify(self): return self.__class__(can_be_None=False, no_nul=self.no_nul) @@ -341,11 +354,8 @@ return False if not self.listdef.same_as(other.listdef): return False - selfdic = self.__dict__.copy() - otherdic = other.__dict__.copy() - del selfdic['listdef'] - del otherdic['listdef'] - return selfdic == otherdic + return compare_dict(self.__dict__, other.__dict__, + ('listdef', 'can_union')) def can_be_none(self): return True @@ -383,11 +393,8 @@ return False if not self.dictdef.same_as(other.dictdef): return False - selfdic = self.__dict__.copy() - otherdic = other.__dict__.copy() - del selfdic['dictdef'] - del otherdic['dictdef'] - return selfdic == otherdic + return compare_dict(self.__dict__, other.__dict__, + ('dictdef', 'can_union')) def can_be_none(self): return True @@ -755,8 +762,15 @@ if s1 == s2: # Most pair(...).union() methods deal incorrectly with that case # when constants are involved. - return s1 - return pair(s1, s2).union() + r = s1 + else: + r = pair(s1, s2).union() + if not s1.can_union and not s1 == r: + raise AnnotatorError("Merging %s and %s forbidden" % (s2, s1)) + if not s2.can_union and not s2 == r: + raise AnnotatorError("Merging %s and %s forbidden" % (s1, s2)) + return r + finally: TLS.no_side_effects_in_union -= 1 @@ -773,6 +787,17 @@ # See comment in union() above if s1 != s2: s1 = pair(s1, s2).union() + for i, s in enumerate(somevalues): + if not s.can_union and not s == s1: + l = [] + for j, _s in enumerate(somevalues): + if i == j: + l.append("* " + repr(_s)) + else: + l.append(" " + repr(_s)) + allargs = "\n".join(l) + raise AnnotatorError("Merging:\n%s\nwill produce %s, * marks strict" + " which cannot be generalized" % (allargs, s1)) return s1 diff --git a/rpython/annotator/test/test_strongly_typed.py b/rpython/annotator/test/test_strongly_typed.py new file mode 100644 --- /dev/null +++ b/rpython/annotator/test/test_strongly_typed.py @@ -0,0 +1,40 @@ + +import py + +from rpython.conftest import option + +from rpython.annotator import model +from rpython.annotator.annrpython import RPythonAnnotator as _RPythonAnnotator + + +class TestAnnotateTestCase: + class RPythonAnnotator(_RPythonAnnotator): + def build_types(self, *args): + s = _RPythonAnnotator.build_types(self, *args) + self.validate() + if option.view: + self.translator.view() + return s + + def build_types(self, func, types): + a = self.RPythonAnnotator() + return a.build_types(func, types) + + def test_simple(self): + def f(a): + return a + + s = model.SomeInteger() + s.can_union = False + self.build_types(f, [s]) + assert s == model.SomeInteger() + + def test_generalize_boom(self): + def f(i): + if i % 15 == 0: + return f(1.5) + return i + + s = model.SomeInteger() + s.can_union = False + py.test.raises(model.AnnotatorError, self.build_types, f, [s]) diff --git a/rpython/doc/signatures.rst b/rpython/doc/signatures.rst new file mode 100644 --- /dev/null +++ b/rpython/doc/signatures.rst @@ -0,0 +1,54 @@ + +Basic types:: + + int - signed machine size integer + r_uint - unsigned machine size integer + r_long/r_ulong/r_longlong/r_ulonglong - various integers + char - single character (byte) + bytes - immutable array of chars + bytes? - nullable bytes + float - double-sized IEEE floating point + +Low level types: + + ll.UCHAR + ll.INT + ... + ll.Array(xxx) + ll.Struct(xxx) + ll.GcStruct(xxx) + ll.GcArray(xxx) + +Container types:: + + list(X) - resizable list of X + array(X) - non-resizable list of X + dict(X, Y) - dict of X keys and Y values + tuple(A, B, C) - tuple of 3 items, A, B, C + list?(X) - nullable list, array or dict + +Classes:: + + class A(object): + _rpython_ = """ + class foobar.A # <- namespace declaration for type name + + a: int + b: list(int) + c: array(int) + """ + +PBCs:: + + space = rpython_pbc("space.ObjSpace", space) - registers PBC under the name "space.ObjSpace", + to be used in signatures + +Examples of a signature:: + + @rpython("int -> int") + def f(a): + return a + + @rpython("space.ObjSpace, int, float -> bytes") + def f(space, i, f): + return space.str_w(space.newbytes(str(i))) From pypy.commits at gmail.com Wed Nov 15 17:09:04 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 14:09:04 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Remove obsolete PyPy-specific changes Message-ID: <5a0cbb00.178fdf0a.93bfd.ed98@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93047:4fe92f1fbcbf Date: 2017-11-15 20:40 +0000 http://bitbucket.org/pypy/pypy/changeset/4fe92f1fbcbf/ Log: Remove obsolete PyPy-specific changes diff --git a/lib-python/3/test/test_cmd_line_script.py b/lib-python/3/test/test_cmd_line_script.py --- a/lib-python/3/test/test_cmd_line_script.py +++ b/lib-python/3/test/test_cmd_line_script.py @@ -43,11 +43,7 @@ _loader = __loader__ if __loader__ is BuiltinImporter else type(__loader__) print('__loader__==%a' % _loader) print('__file__==%a' % __file__) -if __cached__ is not None: - # XXX: test_script_compiled on PyPy - assertEqual(__file__, __cached__) - if not __cached__.endswith(('pyc', 'pyo')): - raise AssertionError('has __cached__ but not compiled') +print('__cached__==%a' % __cached__) print('__package__==%r' % __package__) # Check PEP 451 details import os.path @@ -239,9 +235,8 @@ def test_basic_script(self): with support.temp_dir() as script_dir: script_name = _make_test_script(script_dir, 'script') - package = '' if support.check_impl_detail(pypy=True) else None self._check_script(script_name, script_name, script_name, - script_dir, package, + script_dir, None, importlib.machinery.SourceFileLoader) def test_script_compiled(self): @@ -250,9 +245,8 @@ py_compile.compile(script_name, doraise=True) os.remove(script_name) pyc_file = support.make_legacy_pyc(script_name) - package = '' if support.check_impl_detail(pypy=True) else None self._check_script(pyc_file, pyc_file, - pyc_file, script_dir, package, + pyc_file, script_dir, None, importlib.machinery.SourcelessFileLoader) def test_directory(self): From pypy.commits at gmail.com Wed Nov 15 17:09:06 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 14:09:06 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Always initialise __main__.__loader__ and __main__.__builtins__ (CPython does, it, don't ask me why) Message-ID: <5a0cbb02.88acdf0a.4f669.a427@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93048:6fc0a7040472 Date: 2017-11-15 22:08 +0000 http://bitbucket.org/pypy/pypy/changeset/6fc0a7040472/ Log: Always initialise __main__.__loader__ and __main__.__builtins__ (CPython does, it, don't ask me why) diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -579,6 +579,8 @@ __pypy__.save_module_content_for_future_reload(sys) mainmodule = type(sys)('__main__') + mainmodule.__loader__ = sys.__loader__ + mainmodule.__builtins__ = os.__builtins__ sys.modules['__main__'] = mainmodule if not no_site: From pypy.commits at gmail.com Wed Nov 15 22:57:59 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 19:57:59 -0800 (PST) Subject: [pypy-commit] pypy default: Kill confusing function callback case in emulate_pbc_call() Message-ID: <5a0d0cc7.3bb0df0a.d4ca0.17f2@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93050:203414415a39 Date: 2016-11-20 17:24 +0000 http://bitbucket.org/pypy/pypy/changeset/203414415a39/ Log: Kill confusing function callback case in emulate_pbc_call() diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -313,17 +313,12 @@ parent_graph, parent_block, parent_index = whence tag = parent_block, parent_index self.translator.update_call_graph(parent_graph, graph, tag) - # self.notify[graph.returnblock] is a dictionary of call + # self.notify[graph.returnblock] is a set of call # points to this func which triggers a reflow whenever the # return block of this graph has been analysed. - callpositions = self.notify.setdefault(graph.returnblock, {}) + returnpositions = self.notify.setdefault(graph.returnblock, set()) if whence is not None: - if callable(whence): - def callback(): - whence(self, graph) - else: - callback = whence - callpositions[callback] = True + returnpositions.add(whence) # generalize the function's input arguments self.addpendingblock(graph, graph.startblock, inputcells) @@ -574,12 +569,8 @@ self.follow_link(graph, link, constraints) if block in self.notify: - # reflow from certain positions when this block is done - for callback in self.notify[block]: - if isinstance(callback, tuple): - self.reflowfromposition(callback) # callback is a position - else: - callback() + for position in self.notify[block]: + self.reflowfromposition(position) def follow_link(self, graph, link, constraints): diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -547,10 +547,8 @@ (position_key, "first") and (position_key, "second"). In general, "unique_key" should somehow uniquely identify where - the call is in the source code, and "callback" can be either a - position_key to reflow from when we see more general results, - or a real callback function that will be called with arguments - # "(annotator, called_graph)" whenever the result is generalized. + the call is in the source code, and "callback" is a + position_key to reflow from when we see more general results. "replace" can be set to a list of old unique_key values to forget now, because the given "unique_key" replaces them. diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -2141,28 +2141,6 @@ assert (fdesc.get_s_signatures((2, (), False)) == [([someint,someint],someint)]) - def test_emulated_pbc_call_callback(self): - def f(a,b): - return a + b - from rpython.annotator import annrpython - a = annrpython.RPythonAnnotator() - from rpython.annotator import model as annmodel - - memo = [] - def callb(ann, graph): - memo.append(annmodel.SomeInteger() == ann.binding(graph.getreturnvar())) - - s_f = a.bookkeeper.immutablevalue(f) - s = a.bookkeeper.emulate_pbc_call('f', s_f, [annmodel.SomeInteger(), annmodel.SomeInteger()], - callback=callb) - assert s == annmodel.SomeImpossibleValue() - a.complete() - - assert a.binding(graphof(a, f).getreturnvar()).knowntype == int - assert len(memo) >= 1 - for t in memo: - assert t - def test_iterator_union(self): def it(d): return d.iteritems() From pypy.commits at gmail.com Wed Nov 15 22:57:57 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 19:57:57 -0800 (PST) Subject: [pypy-commit] pypy default: Simplify code Message-ID: <5a0d0cc5.43aadf0a.d1e02.0e7f@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93049:5e549a04ab94 Date: 2016-11-20 16:57 +0000 http://bitbucket.org/pypy/pypy/changeset/5e549a04ab94/ Log: Simplify code diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -1,5 +1,5 @@ from rpython.annotator.model import ( - s_ImpossibleValue, SomeInteger, s_Bool, union) + s_ImpossibleValue, SomeInteger, s_Bool, union, AnnotatorError) from rpython.annotator.listdef import ListItem from rpython.rlib.objectmodel import compute_hash @@ -51,23 +51,19 @@ s_key = self.s_value - def check_eqfn(annotator, graph): - s = annotator.binding(graph.getreturnvar()) - assert s_Bool.contains(s), ( + s = self.bookkeeper.emulate_pbc_call( + myeq, self.s_rdict_eqfn, [s_key, s_key], replace=replace_othereq) + if not s_Bool.contains(s): + raise AnnotatorError( "the custom eq function of an r_dict must return a boolean" " (got %r)" % (s,)) - self.bookkeeper.emulate_pbc_call(myeq, self.s_rdict_eqfn, [s_key, s_key], - replace=replace_othereq, - callback = check_eqfn) - def check_hashfn(annotator, graph): - s = annotator.binding(graph.getreturnvar()) - assert SomeInteger().contains(s), ( + s = self.bookkeeper.emulate_pbc_call( + myhash, self.s_rdict_hashfn, [s_key], replace=replace_otherhash) + if not SomeInteger().contains(s): + raise AnnotatorError( "the custom hash function of an r_dict must return an integer" " (got %r)" % (s,)) - self.bookkeeper.emulate_pbc_call(myhash, self.s_rdict_hashfn, [s_key], - replace=replace_otherhash, - callback = check_hashfn) class DictValue(ListItem): From pypy.commits at gmail.com Wed Nov 15 22:58:02 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 19:58:02 -0800 (PST) Subject: [pypy-commit] pypy default: small cleanup Message-ID: <5a0d0cca.53d71c0a.2ddc.2e50@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93051:08eace48ed36 Date: 2016-11-20 22:11 +0000 http://bitbucket.org/pypy/pypy/changeset/08eace48ed36/ Log: small cleanup diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -309,15 +309,14 @@ #___ interface for annotator.bookkeeper _______ def recursivecall(self, graph, whence, inputcells): - if isinstance(whence, tuple): + if whence is not None: parent_graph, parent_block, parent_index = whence tag = parent_block, parent_index self.translator.update_call_graph(parent_graph, graph, tag) - # self.notify[graph.returnblock] is a set of call - # points to this func which triggers a reflow whenever the - # return block of this graph has been analysed. - returnpositions = self.notify.setdefault(graph.returnblock, set()) - if whence is not None: + # self.notify[graph.returnblock] is a set of call + # points to this func which triggers a reflow whenever the + # return block of this graph has been analysed. + returnpositions = self.notify.setdefault(graph.returnblock, set()) returnpositions.add(whence) # generalize the function's input arguments From pypy.commits at gmail.com Thu Nov 16 00:01:04 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 15 Nov 2017 21:01:04 -0800 (PST) Subject: [pypy-commit] pypy default: Clean up rerased: split interp-level ErasingPairIdentity from translator-level IdentityDesc Message-ID: <5a0d1b90.ddb1df0a.bddec.10dc@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93052:4c883891e3d7 Date: 2016-11-21 19:46 +0000 http://bitbucket.org/pypy/pypy/changeset/4c883891e3d7/ Log: Clean up rerased: split interp-level ErasingPairIdentity from translator-level IdentityDesc diff --git a/rpython/rlib/rerased.py b/rpython/rlib/rerased.py --- a/rpython/rlib/rerased.py +++ b/rpython/rlib/rerased.py @@ -15,6 +15,8 @@ """ import sys +from collections import defaultdict + from rpython.annotator import model as annmodel from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.rtyper.llannotation import lltype_to_annotation @@ -48,34 +50,29 @@ def __deepcopy__(self, memo): return self - def _getdict(self, bk): - try: - dict = bk._erasing_pairs_tunnel - except AttributeError: - dict = bk._erasing_pairs_tunnel = {} - return dict +class IdentityDesc(object): + def __init__(self, bookkeeper): + self.bookkeeper = bookkeeper + self.s_input = annmodel.s_ImpossibleValue + self.reflowpositions = {} - def enter_tunnel(self, bookkeeper, s_obj): - dict = self._getdict(bookkeeper) - s_previousobj, reflowpositions = dict.setdefault( - self, (annmodel.s_ImpossibleValue, {})) - s_obj = annmodel.unionof(s_previousobj, s_obj) - if s_obj != s_previousobj: - dict[self] = (s_obj, reflowpositions) - for position in reflowpositions: - bookkeeper.annotator.reflowfromposition(position) + def enter_tunnel(self, s_obj): + s_obj = annmodel.unionof(self.s_input, s_obj) + if s_obj != self.s_input: + self.s_input = s_obj + for position in self.reflowpositions: + self.bookkeeper.annotator.reflowfromposition(position) - def leave_tunnel(self, bookkeeper): - dict = self._getdict(bookkeeper) - s_obj, reflowpositions = dict.setdefault( - self, (annmodel.s_ImpossibleValue, {})) - reflowpositions[bookkeeper.position_key] = True - return s_obj + def leave_tunnel(self): + self.reflowpositions[self.bookkeeper.position_key] = True + return self.s_input - def get_input_annotation(self, bookkeeper): - dict = self._getdict(bookkeeper) - s_obj, _ = dict[self] - return s_obj +def _get_desc(bk, identity): + try: + descs = bk._erasing_pairs_descs + except AttributeError: + descs = bk._erasing_pairs_descs = defaultdict(lambda: IdentityDesc(bk)) + return descs[identity] _identity_for_ints = ErasingPairIdentity("int") @@ -94,21 +91,23 @@ _about_ = erase def compute_result_annotation(self, s_obj): - identity.enter_tunnel(self.bookkeeper, s_obj) + desc = _get_desc(self.bookkeeper, identity) + desc.enter_tunnel(s_obj) return _some_erased() def specialize_call(self, hop): bk = hop.rtyper.annotator.bookkeeper - s_obj = identity.get_input_annotation(bk) + desc = _get_desc(bk, identity) hop.exception_cannot_occur() - return _rtype_erase(hop, s_obj) + return _rtype_erase(hop, desc.s_input) class Entry(ExtRegistryEntry): _about_ = unerase def compute_result_annotation(self, s_obj): assert _some_erased().contains(s_obj) - return identity.leave_tunnel(self.bookkeeper) + desc = _get_desc(self.bookkeeper, identity) + return desc.leave_tunnel() def specialize_call(self, hop): hop.exception_cannot_occur() @@ -130,6 +129,7 @@ def __init__(self, x, identity): self._x = x self._identity = identity + def __repr__(self): return "Erased(%r, %r)" % (self._x, self._identity) @@ -140,7 +140,7 @@ assert config.translation.taggedpointers, "need to enable tagged pointers to use erase_int" return lltype.cast_int_to_ptr(r_self.lowleveltype, value._x * 2 + 1) bk = r_self.rtyper.annotator.bookkeeper - s_obj = value._identity.get_input_annotation(bk) + s_obj = _get_desc(bk, value._identity).s_input r_obj = r_self.rtyper.getrepr(s_obj) if r_obj.lowleveltype is lltype.Void: return lltype.nullptr(r_self.lowleveltype.TO) @@ -182,9 +182,9 @@ _type_ = Erased def compute_annotation(self): - identity = self.instance._identity + desc = _get_desc(self.bookkeeper, self.instance._identity) s_obj = self.bookkeeper.immutablevalue(self.instance._x) - identity.enter_tunnel(self.bookkeeper, s_obj) + desc.enter_tunnel(s_obj) return _some_erased() # annotation and rtyping support From pypy.commits at gmail.com Thu Nov 16 04:35:07 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 01:35:07 -0800 (PST) Subject: [pypy-commit] pypy default: cherry-pick a small part of the continulet-no-frame-loop branch and make stack() available to all tests; fix test_f_back when run with -A Message-ID: <5a0d5bcb.7996df0a.4610b.47df@mx.google.com> Author: Antonio Cuni Branch: Changeset: r93053:1cac28ee833b Date: 2017-11-16 10:33 +0100 http://bitbucket.org/pypy/pypy/changeset/1cac28ee833b/ Log: cherry-pick a small part of the continulet-no-frame-loop branch and make stack() available to all tests; fix test_f_back when run with -A diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -339,26 +368,8 @@ def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def stack(f=None): - """ - get the call-stack of the caller or the specified frame - """ - if f is None: - f = sys._getframe(1) - res = [] - seen = set() - while f: - if f in seen: - # frame loop - res.append('...') - break - seen.add(f) - res.append(f.f_code.co_name) - f = f.f_back - #print res - return res - def bar(c): assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) From pypy.commits at gmail.com Thu Nov 16 06:43:22 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 16 Nov 2017 03:43:22 -0800 (PST) Subject: [pypy-commit] cffi default: Issue #343 [patch by david naylor] Message-ID: <5a0d79da.7996df0a.4610b.6ccd@mx.google.com> Author: Armin Rigo Branch: Changeset: r3051:d5661822dee4 Date: 2017-11-16 12:42 +0100 http://bitbucket.org/cffi/cffi/changeset/d5661822dee4/ Log: Issue #343 [patch by david naylor] Fix test_recompiler for libc++ diff --git a/testing/cffi1/test_recompiler.py b/testing/cffi1/test_recompiler.py --- a/testing/cffi1/test_recompiler.py +++ b/testing/cffi1/test_recompiler.py @@ -2270,7 +2270,7 @@ char32_t foo_4bytes(char32_t); """) lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """ - #if !defined(__cplusplus) || __cplusplus < 201103L + #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L) typedef uint_least16_t char16_t; typedef uint_least32_t char32_t; #endif From pypy.commits at gmail.com Thu Nov 16 06:43:48 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 16 Nov 2017 03:43:48 -0800 (PST) Subject: [pypy-commit] pypy default: CFFI Issue #343 [patch by david naylor] Message-ID: <5a0d79f4.95091c0a.dec4e.aef8@mx.google.com> Author: Armin Rigo Branch: Changeset: r93054:34aff140932c Date: 2017-11-16 12:43 +0100 http://bitbucket.org/pypy/pypy/changeset/34aff140932c/ Log: CFFI Issue #343 [patch by david naylor] Fix test_recompiler for libc++ diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py @@ -2271,7 +2271,7 @@ char32_t foo_4bytes(char32_t); """) lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """ - #if !defined(__cplusplus) || __cplusplus < 201103L + #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L) typedef uint_least16_t char16_t; typedef uint_least32_t char32_t; #endif From pypy.commits at gmail.com Thu Nov 16 10:21:27 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 16 Nov 2017 07:21:27 -0800 (PST) Subject: [pypy-commit] pypy refactor-PyFloat_FromString: refactor possible recursion in PyFloat_FromString Message-ID: <5a0dacf7.22a8df0a.e5ef.a448@mx.google.com> Author: Matti Picus Branch: refactor-PyFloat_FromString Changeset: r93055:57019e77c377 Date: 2017-11-16 17:19 +0200 http://bitbucket.org/pypy/pypy/changeset/57019e77c377/ Log: refactor possible recursion in PyFloat_FromString diff --git a/pypy/module/cpyext/floatobject.py b/pypy/module/cpyext/floatobject.py --- a/pypy/module/cpyext/floatobject.py +++ b/pypy/module/cpyext/floatobject.py @@ -1,12 +1,12 @@ from rpython.rtyper.lltypesystem import rffi, lltype +from rpython.rlib import rarithmetic from pypy.module.cpyext.api import (PyObjectFields, bootstrap_function, cpython_struct, CANNOT_FAIL, cpython_api, PyObject, build_type_checkers, CONST_STRING) from pypy.module.cpyext.pyobject import ( make_typedescr, track_reference, from_ref) -from pypy.interpreter.error import OperationError from rpython.rlib.rstruct import runpack -from pypy.objspace.std.floatobject import W_FloatObject +from pypy.objspace.std.floatobject import W_FloatObject, basestring_to_float PyFloatObjectStruct = lltype.ForwardReference() PyFloatObject = lltype.Ptr(PyFloatObjectStruct) @@ -66,7 +66,10 @@ """Create a PyFloatObject object based on the string value in str, or NULL on failure. The pend argument is ignored. It remains only for backward compatibility.""" - return space.call_function(space.w_float, w_obj) + # avoid space.call_function(space.w_float, w_obj) since PyFloat_FromString + # could be type.tp_as_number.nb_float which would recurse + value = basestring_to_float(space, w_obj) + return space.newfloat(value) @cpython_api([CONST_STRING, rffi.INT_real], rffi.DOUBLE, error=-1.0) def _PyFloat_Unpack4(space, ptr, le): diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -135,6 +135,28 @@ return space.w_NotImplemented return func_with_new_name(_compare, 'descr_' + opname) +def basestring_to_float(space, w_value): + def _string_to_float(space, w_source, string): + try: + return rfloat.string_to_float(string) + except ParseStringError as e: + raise wrap_parsestringerror(space, e, w_source) + + if space.isinstance_w(w_value, space.w_unicode): + from unicodeobject import unicode_to_decimal_w + value = _string_to_float(space, w_value, + unicode_to_decimal_w(space, w_value)) + else: + try: + value = space.charbuf_w(w_value) + except OperationError as e: + if e.match(space, space.w_TypeError): + raise oefmt( + space.w_TypeError, + "float() argument must be a string or a number") + raise + value = _string_to_float(space, w_value, value) + return value class W_FloatObject(W_Root): """This is a implementation of the app-level 'float' type. @@ -193,32 +215,14 @@ @staticmethod @unwrap_spec(w_x=WrappedDefault(0.0)) def descr__new__(space, w_floattype, w_x): - def _string_to_float(space, w_source, string): - try: - return rfloat.string_to_float(string) - except ParseStringError as e: - raise wrap_parsestringerror(space, e, w_source) - w_value = w_x # 'x' is the keyword argument name in CPython if space.lookup(w_value, "__float__") is not None: w_obj = space.float(w_value) if space.is_w(w_floattype, space.w_float): return w_obj value = space.float_w(w_obj) - elif space.isinstance_w(w_value, space.w_unicode): - from unicodeobject import unicode_to_decimal_w - value = _string_to_float(space, w_value, - unicode_to_decimal_w(space, w_value)) else: - try: - value = space.charbuf_w(w_value) - except OperationError as e: - if e.match(space, space.w_TypeError): - raise oefmt( - space.w_TypeError, - "float() argument must be a string or a number") - raise - value = _string_to_float(space, w_value, value) + value = basestring_to_float(space, w_value) w_obj = space.allocate_instance(W_FloatObject, w_floattype) W_FloatObject.__init__(w_obj, value) return w_obj From pypy.commits at gmail.com Thu Nov 16 11:30:05 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 08:30:05 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop-2: a branch where to try to fix issue 2683 in a different (and simpler) way than continulet-no-frame-loop Message-ID: <5a0dbd0d.3bb0df0a.d4ca0.62d7@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop-2 Changeset: r93056:5dfc7af8c0ff Date: 2017-11-16 16:22 +0100 http://bitbucket.org/pypy/pypy/changeset/5dfc7af8c0ff/ Log: a branch where to try to fix issue 2683 in a different (and simpler) way than continulet-no-frame-loop From pypy.commits at gmail.com Thu Nov 16 11:30:07 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 08:30:07 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop-2: cherry pick two failing tests from the branch continulet-no-frame-loop Message-ID: <5a0dbd0f.3bb0df0a.d4ca0.62e0@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop-2 Changeset: r93057:37701890010b Date: 2017-11-16 16:25 +0100 http://bitbucket.org/pypy/pypy/changeset/37701890010b/ Log: cherry pick two failing tests from the branch continulet-no-frame- loop diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -365,27 +365,47 @@ assert res == 2002 assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] - def test_f_back(self): + def test_f_back_no_cycles(self): import sys from _continuation import continulet stack = self.stack # def bar(c): - assert stack() == ['bar', 'foo', 'test_f_back'] + f = sys._getframe(0) + assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] + c.switch(f) + assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] + def foo(c): + bar(c) + # + c = continulet(foo) + assert stack() == ['test_f_back_no_cycles'] + f = c.switch() + assert stack() == ['test_f_back_no_cycles'] + assert stack(f) == ['bar', 'foo'] + c.switch() + + def test_f_back_complex(self): + import sys + from _continuation import continulet + stack = self.stack + # + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back_complex'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) # - assert stack() == ['bar', 'foo', 'main', 'test_f_back'] + assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex'] c.switch(sys._getframe(1).f_back) # - assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # - assert stack() == ['test_f_back'] + assert stack() == ['test_f_back_complex'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -398,15 +418,16 @@ def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' - assert f3_foo.f_back is f1_bar # not running, so a loop - assert stack() == ['main', 'test_f_back'] - assert stack(f1_bar) == ['bar', 'foo', '...'] + assert f3_foo.f_back is None # not running + assert stack() == ['main', 'test_f_back_complex'] + assert stack(f1_bar) == ['bar', 'foo'] # def main2(): f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' - assert f3_foo.f_back is f1_bar # not running, so a loop - assert stack(f1_bar) == ['bar', 'foo', '...'] + assert f3_foo.f_back is None # not running + assert stack() == ['main2', 'test_f_back_complex'] + assert stack(f1_bar) == ['bar', 'foo'] # main() main2() From pypy.commits at gmail.com Thu Nov 16 11:30:09 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 08:30:09 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop-2: WIP: (antocuni, arigato): refactor things so that we no longer need a bottomframe, and that the bottom-most frame of each continulet is always None: this mimics more closely the stack of greenlets on CPython, and avoid building frame cycles. The corresponding test_f_back_* are failing right now because they are still checking the old behavior Message-ID: <5a0dbd11.d08edf0a.a9d08.857b@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop-2 Changeset: r93058:55154dad821a Date: 2017-11-16 17:28 +0100 http://bitbucket.org/pypy/pypy/changeset/55154dad821a/ Log: WIP: (antocuni, arigato): refactor things so that we no longer need a bottomframe, and that the bottom-most frame of each continulet is always None: this mimics more closely the stack of greenlets on CPython, and avoid building frame cycles. The corresponding test_f_back_* are failing right now because they are still checking the old behavior diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,6 +1,7 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit from pypy.interpreter.error import OperationError, get_cleared_operation_error +from pypy.interpreter.error import oefmt from pypy.interpreter.executioncontext import ExecutionContext from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.typedef import TypeDef @@ -30,17 +31,9 @@ raise geterror(self.space, "continulet already __init__ialized") sthread = build_sthread(self.space) # - # hackish: build the frame "by hand", passing it the correct arguments space = self.space - w_args, w_kwds = __args__.topacked() - bottomframe = space.createframe(get_entrypoint_pycode(space), - get_w_module_dict(space), None) - bottomframe.locals_cells_stack_w[0] = self - bottomframe.locals_cells_stack_w[1] = w_callable - bottomframe.locals_cells_stack_w[2] = w_args - bottomframe.locals_cells_stack_w[3] = w_kwds - bottomframe.last_exception = get_cleared_operation_error(space) - self.bottomframe = bottomframe + self.w_callable = w_callable + self.__args__ = __args__.prepend(self) # global_state.origin = self self.sthread = sthread @@ -221,9 +214,14 @@ self = global_state.origin self.h = h global_state.clear() + self.backframeref = self.sthread.ec.topframeref + self.sthread.ec.topframeref = jit.vref_None try: - frame = self.bottomframe - w_result = frame.execute_frame() + w_res = self.descr_switch() + if w_res is not self.space.w_None: + raise oefmt(self.space.w_TypeError, + "can't send non-None value to a just-started continulet") + w_result = self.space.call_args(self.w_callable, self.__args__) except Exception as e: global_state.propagate_exception = e else: @@ -241,9 +239,9 @@ self.h, origin.h = origin.h, h # current = sthread.ec.topframeref - sthread.ec.topframeref = self.bottomframe.f_backref - self.bottomframe.f_backref = origin.bottomframe.f_backref - origin.bottomframe.f_backref = current + sthread.ec.topframeref = self.backframeref + self.backframeref = origin.backframeref + origin.backframeref = current # return get_result() From pypy.commits at gmail.com Thu Nov 16 12:44:58 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 16 Nov 2017 09:44:58 -0800 (PST) Subject: [pypy-commit] pypy default: graft oneliner from stdlib from 2.7.14 to fix tests Message-ID: <5a0dce9a.6583df0a.49d9e.33ce@mx.google.com> Author: Matti Picus Branch: Changeset: r93059:107848cb5acc Date: 2017-11-16 19:44 +0200 http://bitbucket.org/pypy/pypy/changeset/107848cb5acc/ Log: graft oneliner from stdlib from 2.7.14 to fix tests diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) From pypy.commits at gmail.com Thu Nov 16 13:30:50 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 10:30:50 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop-2: fix and simplify test_f_back_*: now that we hide the frames below bottomframe, a part of the test does not longer makes sense since we don't have any frame to check :) Message-ID: <5a0dd95a.8cabdf0a.e68ae.4d86@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop-2 Changeset: r93060:e8f933d33b7e Date: 2017-11-16 18:52 +0100 http://bitbucket.org/pypy/pypy/changeset/e8f933d33b7e/ Log: fix and simplify test_f_back_*: now that we hide the frames below bottomframe, a part of the test does not longer makes sense since we don't have any frame to check :) diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -372,9 +372,9 @@ # def bar(c): f = sys._getframe(0) - assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] + assert stack() == ['bar', 'foo'] c.switch(f) - assert stack() == ['bar', 'foo', 'test_f_back_no_cycles'] + assert stack() == ['bar', 'foo'] def foo(c): bar(c) # @@ -391,17 +391,13 @@ stack = self.stack # def bar(c): - assert stack() == ['bar', 'foo', 'test_f_back_complex'] + assert stack() == ['bar', 'foo'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) # - assert stack() == ['bar', 'foo', 'main', 'test_f_back_complex'] + assert stack() == ['bar', 'foo'] c.switch(sys._getframe(1).f_back) - # - assert stack() == ['bar', 'foo', 'main2', 'test_f_back_complex'] - assert sys._getframe(2) is f3_foo.f_back - c.switch(sys._getframe(2)) def foo(c): bar(c) # @@ -416,21 +412,13 @@ assert f1_bar.f_back is f3_foo # def main(): - f4_main = c.switch() - assert f4_main.f_code.co_name == 'main' + f4_None = c.switch() + assert f4_None is None assert f3_foo.f_back is None # not running assert stack() == ['main', 'test_f_back_complex'] assert stack(f1_bar) == ['bar', 'foo'] # - def main2(): - f5_main2 = c.switch() - assert f5_main2.f_code.co_name == 'main2' - assert f3_foo.f_back is None # not running - assert stack() == ['main2', 'test_f_back_complex'] - assert stack(f1_bar) == ['bar', 'foo'] - # main() - main2() res = c.switch() assert res is None assert f3_foo.f_back is None From pypy.commits at gmail.com Thu Nov 16 13:30:52 2017 From: pypy.commits at gmail.com (antocuni) Date: Thu, 16 Nov 2017 10:30:52 -0800 (PST) Subject: [pypy-commit] pypy continulet-no-frame-loop-2: fix permute, and rewrite the corresponding test since we can no longer check what is the 'back' frame Message-ID: <5a0dd95c.22a8df0a.e5ef.d86a@mx.google.com> Author: Antonio Cuni Branch: continulet-no-frame-loop-2 Changeset: r93061:8c14e037eea6 Date: 2017-11-16 19:21 +0100 http://bitbucket.org/pypy/pypy/changeset/8c14e037eea6/ Log: fix permute, and rewrite the corresponding test since we can no longer check what is the 'back' frame diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -280,8 +280,7 @@ # if len(contlist) > 1: otherh = contlist[-1].h - otherb = contlist[-1].bottomframe.f_backref + otherb = contlist[-1].backframeref for cont in contlist: otherh, cont.h = cont.h, otherh - b = cont.bottomframe - otherb, b.f_backref = b.f_backref, otherb + otherb, cont.backframeref = cont.backframeref, otherb diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -714,24 +714,31 @@ import sys from _continuation import continulet, permute # - def f1(c1): - res = c1.switch() - assert res == "ok" - return "done" + def a(c): + seen.append(2) + res = c.switch() + assert res == 'b' + seen.append(6) + return 'a' + def b(c): + seen.append(3) + c.switch() + seen.append(5) + return 'b' # - def f2(c2): - assert sys._getframe(1).f_code.co_name == 'main' - permute(c1, c2) - assert sys._getframe(1).f_code.co_name == 'f1' - return "ok" - # - c1 = continulet(f1) - c2 = continulet(f2) - def main(): - c1.switch() - res = c2.switch() - assert res == "done" - main() + seen = [] + c1 = continulet(a) + c2 = continulet(b) + seen.append(1) + c1.switch() + c2.switch() + seen.append(4) + permute(c1, c2) + res = c1.switch() + assert res == 'a' + assert not c2.is_pending() + seen.append(7) + assert seen == [1, 2, 3, 4, 5, 6, 7] def test_permute_noninitialized(self): from _continuation import continulet, permute From pypy.commits at gmail.com Thu Nov 16 14:12:31 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 16 Nov 2017 11:12:31 -0800 (PST) Subject: [pypy-commit] pypy refactor-PyFloat_FromString: document and close branch to merge Message-ID: <5a0de31f.55281c0a.7303d.572b@mx.google.com> Author: Matti Picus Branch: refactor-PyFloat_FromString Changeset: r93062:2bf28f126b37 Date: 2017-11-16 20:31 +0200 http://bitbucket.org/pypy/pypy/changeset/2bf28f126b37/ Log: document and close branch to merge diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,8 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: refactor-PyFloat_FromString +Refactor PyFloat_FromString so it can be used inside nb_float, together with +a pull request to NumPy makes string ndarray float(a) and a.__float__() follow the +same code path From pypy.commits at gmail.com Thu Nov 16 14:41:08 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 16 Nov 2017 11:41:08 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Let SyntaxError tracebacks show the bad code line (hard to test) Message-ID: <5a0de9d4.01141c0a.f0840.945f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93063:12061ebde67d Date: 2017-11-16 19:40 +0000 http://bitbucket.org/pypy/pypy/changeset/12061ebde67d/ Log: Let SyntaxError tracebacks show the bad code line (hard to test) diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -14,7 +14,20 @@ def wrap_info(self, space): w_text = w_filename = space.w_None offset = self.offset - if self.text is not None: + w_lineno = space.newint(self.lineno) + if self.filename is not None: + w_filename = space.newfilename(self.filename) + if self.text is None and self.filename is not None: + w_text = space.appexec([w_filename, w_lineno], + """(filename, lineno): + try: + with open(filename) as f: + for _ in range(lineno): + f.read() + return f.read() + except: # we can't allow any exceptions here! + return None""") + elif self.text is not None: from rpython.rlib.runicode import str_decode_utf_8 # self.text may not be UTF-8 in case of decoding errors. # adjust the encoded text offset to a decoded offset @@ -29,20 +42,15 @@ text, _ = str_decode_utf_8(self.text, len(self.text), 'replace') w_text = space.newunicode(text) - if self.filename is not None: - w_filename = space.newfilename(self.filename) - return space.newtuple([space.newtext(self.msg), - space.newtuple([w_filename, - space.newint(self.lineno), - space.newint(offset), - w_text, - space.newint(self.lastlineno)])]) + return space.newtuple([ + space.newtext(self.msg), + space.newtuple([ + w_filename, w_lineno, space.newint(offset), + w_text, space.newint(self.lastlineno)])]) def __str__(self): - return "%s at pos (%d, %d) in %r" % (self.__class__.__name__, - self.lineno, - self.offset, - self.text) + return "%s at pos (%d, %d) in %r" % ( + self.__class__.__name__, self.lineno, self.offset, self.text) class IndentationError(SyntaxError): pass @@ -51,10 +59,11 @@ def __init__(self, lineno=0, offset=0, text=None, filename=None, lastlineno=0): msg = "inconsistent use of tabs and spaces in indentation" - IndentationError.__init__(self, msg, lineno, offset, text, filename, lastlineno) + IndentationError.__init__( + self, msg, lineno, offset, text, filename, lastlineno) class ASTError(Exception): - def __init__(self, msg, ast_node ): + def __init__(self, msg, ast_node): self.msg = msg self.ast_node = ast_node diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -733,10 +733,14 @@ self.w_msg = args_w[0] if len(args_w) == 2: values_w = space.fixedview(args_w[1]) - if len(values_w) > 0: self.w_filename = values_w[0] - if len(values_w) > 1: self.w_lineno = values_w[1] - if len(values_w) > 2: self.w_offset = values_w[2] - if len(values_w) > 3: self.w_text = values_w[3] + if len(values_w) > 0: + self.w_filename = values_w[0] + if len(values_w) > 1: + self.w_lineno = values_w[1] + if len(values_w) > 2: + self.w_offset = values_w[2] + if len(values_w) > 3: + self.w_text = values_w[3] if len(values_w) > 4: self.w_lastlineno = values_w[4] # PyPy extension # kill the extra items from args_w to prevent undesired effects From pypy.commits at gmail.com Thu Nov 16 15:21:03 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 16 Nov 2017 12:21:03 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Skip CPython-specific test Message-ID: <5a0df32f.d7941c0a.eed22.57d7@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93064:44b2fd82cbfa Date: 2017-11-16 20:20 +0000 http://bitbucket.org/pypy/pypy/changeset/44b2fd82cbfa/ Log: Skip CPython-specific test diff --git a/lib-python/3/test/test_cprofile.py b/lib-python/3/test/test_cprofile.py --- a/lib-python/3/test/test_cprofile.py +++ b/lib-python/3/test/test_cprofile.py @@ -1,7 +1,7 @@ """Test suite for the cProfile module.""" import sys -from test.support import run_unittest, TESTFN, unlink +from test.support import run_unittest, TESTFN, unlink, cpython_only # rip off all interesting stuff from test_profile import cProfile @@ -16,6 +16,7 @@ return _ProfileOutput # Issue 3895. + @cpython_only def test_bad_counter_during_dealloc(self): import _lsprof # Must use a file as StringIO doesn't trigger the bug. From pypy.commits at gmail.com Thu Nov 16 20:52:10 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 16 Nov 2017 17:52:10 -0800 (PST) Subject: [pypy-commit] pypy py3.5: (pjenvey) fix SyntaxError.wrap_info() Message-ID: <5a0e40ca.078bdf0a.27561.4946@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93065:cce3bc563868 Date: 2017-11-17 01:51 +0000 http://bitbucket.org/pypy/pypy/changeset/cce3bc563868/ Log: (pjenvey) fix SyntaxError.wrap_info() diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -23,8 +23,8 @@ try: with open(filename) as f: for _ in range(lineno): - f.read() - return f.read() + f.readline() + return f.readline() except: # we can't allow any exceptions here! return None""") elif self.text is not None: From pypy.commits at gmail.com Thu Nov 16 22:28:55 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 16 Nov 2017 19:28:55 -0800 (PST) Subject: [pypy-commit] pypy default: Convert ListItem.read_locations from dict to set Message-ID: <5a0e5777.c4d51c0a.55a59.9091@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93066:e2f076c1fae4 Date: 2017-11-17 03:09 +0000 http://bitbucket.org/pypy/pypy/changeset/e2f076c1fae4/ Log: Convert ListItem.read_locations from dict to set diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -89,11 +89,11 @@ self.force_non_null = force_non_null def read_key(self, position_key): - self.dictkey.read_locations[position_key] = True + self.dictkey.read_locations.add(position_key) return self.dictkey.s_value def read_value(self, position_key): - self.dictvalue.read_locations[position_key] = True + self.dictvalue.read_locations.add(position_key) return self.dictvalue.s_value def same_as(self, other): diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py --- a/rpython/annotator/listdef.py +++ b/rpython/annotator/listdef.py @@ -30,7 +30,7 @@ self.s_value = s_value self.bookkeeper = bookkeeper self.itemof = {} # set of all ListDefs using this ListItem - self.read_locations = {} + self.read_locations = set() if bookkeeper is None: self.dont_change_any_more = True @@ -95,7 +95,7 @@ self.notify_update() if s_new_value != s_other_value: other.notify_update() - self.read_locations.update(other.read_locations) + self.read_locations |= other.read_locations def patch(self): for listdef in self.itemof: @@ -130,7 +130,7 @@ self.listitem.itemof[self] = True def read_item(self, position_key): - self.listitem.read_locations[position_key] = True + self.listitem.read_locations.add(position_key) return self.listitem.s_value def same_as(self, other): From pypy.commits at gmail.com Fri Nov 17 03:18:45 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 17 Nov 2017 00:18:45 -0800 (PST) Subject: [pypy-commit] pypy default: graft part of edb8f85891e5 that un-breaks own tests on win32 Message-ID: <5a0e9b65.929bdf0a.ea12b.f13b@mx.google.com> Author: Matti Picus Branch: Changeset: r93067:a8d2e8dc97fa Date: 2017-11-17 10:17 +0200 http://bitbucket.org/pypy/pypy/changeset/a8d2e8dc97fa/ Log: graft part of edb8f85891e5 that un-breaks own tests on win32 diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -62,7 +62,6 @@ SHARED.join('compat.c'), SHARED.join('machine.c'), SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_mt.c'), SHARED.join('vmprof_memory.c'), SHARED.join('vmprof_common.c'), # symbol table already in separate_module_files @@ -70,6 +69,10 @@ post_include_bits=[], compile_extra=compile_extra ) +if sys.platform.startswith('linux'): + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) global_eci = ExternalCompilationInfo(**eci_kwds) def configure_libbacktrace_linux(): From pypy.commits at gmail.com Fri Nov 17 08:05:13 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 17 Nov 2017 05:05:13 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: get back to the point of passing objspace tests with utf8 turnaround removed from codecs Message-ID: <5a0ede89.5d87df0a.a0b86.fe98@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93068:1e43261d5fd9 Date: 2017-11-17 14:04 +0100 http://bitbucket.org/pypy/pypy/changeset/1e43261d5fd9/ Log: get back to the point of passing objspace tests with utf8 turnaround removed from codecs diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -68,6 +68,308 @@ flag = rutf8.FLAG_REGULAR return flag +# These functions take and return unwrapped rpython strings +def decode_unicode_escape(space, string): + state = space.fromcache(interp_codecs.CodecState) + unicodedata_handler = state.get_unicodedata_handler(space) + result_utf8, consumed, length, flag = str_decode_unicode_escape( + string, "strict", + final=True, + errorhandler=decode_error_handler(space), + ud_handler=unicodedata_handler) + return result_utf8, length, flag + +def decode_raw_unicode_escape(space, string): + result_utf8, consumed, lgt, flag = str_decode_raw_unicode_escape( + string, "strict", + final=True, errorhandler=decode_error_handler(space)) + return result_utf8, lgt, flag + +def check_ascii_or_raise(space, string): + try: + rutf8.check_ascii(string) + except rutf8.CheckError as e: + decode_error_handler(space)('strict', 'ascii', + 'ordinal not in range(128)', string, + e.pos, e.pos + 1) + assert False, "unreachable" + +def check_utf8_or_raise(space, string): + # Surrogates are accepted and not treated specially at all. + # If there happen to be two 3-bytes encoding a pair of surrogates, + # you still get two surrogate unicode characters in the result. + # These are the Python2 rules; Python3 differs. + try: + length, flag = rutf8.check_utf8(string, allow_surrogates=True) + except rutf8.CheckError as e: + # convert position into unicode position + lgt, flags = rutf8.check_utf8(string, True, stop=e.pos) + decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, + lgt, lgt + 1) + assert False, "unreachable" + return length, flag + +def decode_utf8(space, s): + # DEPRECATED + return (s, check_utf8_or_raise(space, s)) + +def str_decode_ascii(s, errors, final, errorhandler): + try: + rutf8.check_ascii(s) + return s, len(s), len(s), rutf8.FLAG_ASCII + except rutf8.CheckError: + return _str_decode_ascii_slowpath(s, errors, final, errorhandler) + +def _str_decode_ascii_slowpath(s, errors, final, errorhandler): + i = 0 + res = StringBuilder() + while i < len(s): + ch = s[i] + if ord(ch) > 0x7F: + r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)', + s, i, i + 1) + res.append(r) + else: + res.append(ch) + i += 1 + ress = res.build() + lgt, flag = rutf8.check_utf8(ress, True) + return ress, len(s), lgt, flag + +def str_decode_latin_1(s, errors, final, errorhandler): + xxx + +def utf8_encode_latin_1(s, errors, errorhandler): + try: + rutf8.check_ascii(s) + return s + except rutf8.CheckError: + return _utf8_encode_latin_1_slowpath(s, errors, errorhandler) + +def _utf8_encode_latin_1_slowpath(s, errors, errorhandler): + res = StringBuilder(len(s)) + size = len(s) + cur = 0 + i = 0 + while i < size: + if ord(s[i]) <= 0x7F: + res.append(s[i]) + else: + oc = rutf8.codepoint_at_pos(s, i) + if oc <= 0xFF: + res.append(chr(oc)) + i += 1 + else: + r, pos = errorhandler(errors, 'latin1', + 'ordinal not in range(256)', s, cur, + cur + 1) + res.append(r) + for j in range(pos - cur): + i = rutf8.next_codepoint_pos(s, i) + cur = pos + cur += 1 + i += 1 + r = res.build() + return r + +class DecodeWrapper(object): + def __init__(self, handler): + self.orig = handler + + def handle(self, errors, encoding, msg, s, pos, endpos): + return self.orig(errors, encoding, msg, s, pos, endpos) + +class EncodeWrapper(object): + def __init__(self, handler): + self.orig = handler + + def handle(self, errors, encoding, msg, s, pos, endpos): + return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) + +#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler): +# w = DecodeWrapper(errorhandler) +# u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final, +# w.handle, +# ud_handler) +# return u.encode('utf8'), pos, len(u), _get_flag(u) + +def setup_new_encoders_legacy(encoding): + encoder_name = 'utf8_encode_' + encoding + encoder_call_name = 'unicode_encode_' + encoding + decoder_name = 'str_decode_' + encoding + def encoder(utf8, errors, errorhandler): + u = utf8.decode("utf8") + w = EncodeWrapper(errorhandler) + return getattr(runicode, encoder_call_name)(u, len(u), errors, + w.handle) + def decoder(s, slen, errors, final, errorhandler): + w = DecodeWrapper((errorhandler)) + u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle) + return u.encode('utf8'), pos, len(u), _get_flag(u) + encoder.__name__ = encoder_name + decoder.__name__ = decoder_name + if encoder_name not in globals(): + globals()[encoder_name] = encoder + if decoder_name not in globals(): + globals()[decoder_name] = decoder + +def setup(): + for encoding in ['utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32', + 'utf_32_be', 'unicode_internal']: + setup_new_encoders_legacy(encoding) + +setup() + +def utf8_encode_ascii(utf8, errors, errorhandler): + """ Don't be confused - this is a slowpath for errors e.g. "ignore" + or an obscure errorhandler + """ + res = StringBuilder() + i = 0 + pos = 0 + while i < len(utf8): + ch = rutf8.codepoint_at_pos(utf8, i) + if ch >= 0x7F: + msg = "ordinal not in range(128)" + r, newpos = errorhandler(errors, 'ascii', msg, utf8, + pos, pos + 1) + for _ in range(newpos - pos): + i = rutf8.next_codepoint_pos(utf8, i) + pos = newpos + res.append(r) + else: + res.append(chr(ch)) + i = rutf8.next_codepoint_pos(utf8, i) + pos += 1 + + s = res.build() + return s + +def str_decode_utf8(s, errors, final, errorhandler): + """ Same as checking for the valid utf8, but we know the utf8 is not + valid so we're trying to either raise or pack stuff with error handler. + The key difference is that this is call_may_force + """ + slen = len(s) + res = StringBuilder(slen) + pos = 0 + continuation_bytes = 0 + end = len(s) + while pos < end: + ordch1 = ord(s[pos]) + # fast path for ASCII + if ordch1 <= 0x7F: + pos += 1 + res.append(chr(ordch1)) + continue + + if ordch1 <= 0xC1: + r, pos = errorhandler(errors, "utf8", "invalid start byte", + s, pos, pos + 1) + res.append(r) + continue + + pos += 1 + + if ordch1 <= 0xDF: + if pos >= end: + if not final: + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos) + res.append(r) + continue + ordch2 = ord(s[pos]) + + if rutf8._invalid_byte_2_of_2(ordch2): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz + pos += 1 + continuation_bytes += 1 + res.append(chr(ordch1)) + res.append(chr(ordch2)) + continue + + if ordch1 <= 0xEF: + if (pos + 2) > end: + if not final: + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos + 1) + res.append(r) + continue + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) + + if rutf8._invalid_byte_2_of_3(ordch1, ordch2, True): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + elif rutf8._invalid_byte_3_of_3(ordch3): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 1) + res.append(r) + continue + pos += 2 + + # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz + continuation_bytes += 2 + res.append(chr(ordch1)) + res.append(chr(ordch2)) + res.append(chr(ordch3)) + continue + + if ordch1 <= 0xF4: + if (pos + 3) > end: + if not final: + break + r, pos = errorhandler(errors, "utf8", "unexpected end of data", + s, pos - 1, pos) + res.append(r) + continue + ordch2 = ord(s[pos]) + ordch3 = ord(s[pos + 1]) + ordch4 = ord(s[pos + 2]) + + if rutf8._invalid_byte_2_of_4(ordch1, ordch2): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos) + res.append(r) + continue + elif rutf8._invalid_byte_3_of_4(ordch3): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 1) + res.append(r) + continue + elif rutf8._invalid_byte_4_of_4(ordch4): + r, pos = errorhandler(errors, "utf8", "invalid continuation byte", + s, pos - 1, pos + 2) + res.append(r) + continue + + pos += 3 + # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz + res.append(chr(ordch1)) + res.append(chr(ordch2)) + res.append(chr(ordch3)) + res.append(chr(ordch4)) + continuation_bytes += 3 + continue + + r, pos = errorhandler(errors, "utf8", "invalid start byte", + s, pos - 1, pos) + res.append(r) + + assert pos == end + assert pos - continuation_bytes >= 0 + r = res.build() + lgt, flag = rutf8.check_utf8(r, True) + return r, pos - continuation_bytes, lgt, flag + def hexescape(builder, s, pos, digits, encoding, errorhandler, message, errors): chr = 0 @@ -273,178 +575,57 @@ return builder.build(), pos, outsize, flag -# These functions take and return unwrapped rpython strings and unicodes -def decode_unicode_escape(space, string): - state = space.fromcache(interp_codecs.CodecState) - unicodedata_handler = state.get_unicodedata_handler(space) - result_utf8, consumed, length, flag = str_decode_unicode_escape( - string, "strict", - final=True, - errorhandler=decode_error_handler(space), - ud_handler=unicodedata_handler) - return result_utf8, length, flag +# ____________________________________________________________ +# Raw unicode escape -def decode_raw_unicode_escape(space, string): - # XXX pick better length, maybe - # XXX that guy does not belong in runicode (nor in rutf8) - result_u, consumed = runicode.str_decode_raw_unicode_escape( - string, len(string), "strict", - final=True, errorhandler=DecodeWrapper(decode_error_handler(space)).handle) - # XXX argh. we want each surrogate to be encoded separately - utf8 = ''.join([u.encode('utf8') for u in result_u]) - if rutf8.first_non_ascii_char(utf8) == -1: - flag = rutf8.FLAG_ASCII - elif _has_surrogate(result_u): - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR - return utf8, len(result_u), flag +def str_decode_raw_unicode_escape(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0, rutf8.FLAG_ASCII -def check_ascii_or_raise(space, string): - try: - rutf8.check_ascii(string) - except rutf8.CheckError as e: - decode_error_handler(space)('strict', 'ascii', - 'ordinal not in range(128)', string, - e.pos, e.pos + 1) - assert False, "unreachable" + result = StringBuilder(size) + pos = 0 + while pos < size: + ch = s[pos] -def check_utf8_or_raise(space, string): - # Surrogates are accepted and not treated specially at all. - # If there happen to be two 3-bytes encoding a pair of surrogates, - # you still get two surrogate unicode characters in the result. - # These are the Python2 rules; Python3 differs. - try: - length, flag = rutf8.check_utf8(string, allow_surrogates=True) - except rutf8.CheckError as e: - # convert position into unicode position - lgt, flags = rutf8.check_utf8(string, True, stop=e.pos) - decode_error_handler(space)('strict', 'utf8', 'invalid utf-8', string, - lgt, lgt + 1) - assert False, "unreachable" - return length, flag + # Non-escape characters are interpreted as Unicode ordinals + if ch != '\\': + rutf8.unichr_as_utf8_append(result, ord(ch), True) + pos += 1 + continue -def encode_utf8(space, uni): - # DEPRECATED - # Note that this function never raises UnicodeEncodeError, - # since surrogates are allowed, either paired or lone. - # A paired surrogate is considered like the non-BMP character - # it stands for. These are the Python2 rules; Python3 differs. - return runicode.unicode_encode_utf_8( - uni, len(uni), "strict", - errorhandler=None, - allow_surrogates=True) + # \u-escapes are only interpreted iff the number of leading + # backslashes is odd + bs = pos + while pos < size: + pos += 1 + if pos == size or s[pos] != '\\': + break + result.append('\\') -def decode_utf8(space, s): - # DEPRECATED - return (s, check_utf8_or_raise(space, s)) + # we have a backslash at the end of the string, stop here + if pos >= size: + result.append('\\') + break -def str_decode_ascii(s, errors, final, errorhandler): - try: - rutf8.check_ascii(s) - return s, len(s), len(s), rutf8.FLAG_ASCII - except rutf8.CheckError: - return _str_decode_ascii_slowpath(s, errors, final, errorhandler) + if ((pos - bs) & 1 == 0 or + pos >= size or + (s[pos] != 'u' and s[pos] != 'U')): + result.append('\\') + rutf8.unichr_as_utf8_append(result, ord(s[pos]), True) + pos += 1 + continue -def _str_decode_ascii_slowpath(s, errors, final, errorhandler): - i = 0 - res = StringBuilder() - while i < len(s): - ch = s[i] - if ord(ch) > 0x7F: - r, i = errorhandler(errors, 'ascii', 'ordinal not in range(128)', - s, i, i + 1) - res.append(r) - else: - res.append(ch) - i += 1 - ress = res.build() - lgt, flag = rutf8.check_utf8(ress, True) - return ress, len(s), lgt, flag + digits = 4 if s[pos] == 'u' else 8 + message = "truncated \\uXXXX" + pos += 1 + pos = hexescape(result, s, pos, digits, + "rawunicodeescape", errorhandler, message, errors) -# XXX wrappers, think about speed - -class DecodeWrapper(object): - def __init__(self, handler): - self.orig = handler - - def handle(self, errors, encoding, msg, s, pos, endpos): - return self.orig(errors, encoding, msg, s, pos, endpos) - -class EncodeWrapper(object): - def __init__(self, handler): - self.orig = handler - - def handle(self, errors, encoding, msg, s, pos, endpos): - return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) - -#def str_decode_unicode_escape(s, slen, errors, final, errorhandler, ud_handler): -# w = DecodeWrapper(errorhandler) -# u, pos = runicode.str_decode_unicode_escape(s, slen, errors, final, -# w.handle, -# ud_handler) -# return u.encode('utf8'), pos, len(u), _get_flag(u) - -def setup_new_encoders_legacy(encoding): - encoder_name = 'utf8_encode_' + encoding - encoder_call_name = 'unicode_encode_' + encoding - decoder_name = 'str_decode_' + encoding - def encoder(utf8, utf8len, errors, errorhandler): - u = utf8.decode("utf8") - w = EncodeWrapper(errorhandler) - return getattr(runicode, encoder_call_name)(u, len(u), errors, - w.handle) - def decoder(s, slen, errors, final, errorhandler): - w = DecodeWrapper((errorhandler)) - u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle) - return u.encode('utf8'), pos, len(u), _get_flag(u) - encoder.__name__ = encoder_name - decoder.__name__ = decoder_name - if encoder_name not in globals(): - globals()[encoder_name] = encoder - if decoder_name not in globals(): - globals()[decoder_name] = decoder - -def setup(): - for encoding in ['raw_unicode_escape', - 'utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32', - 'utf_32_be', 'latin_1', 'unicode_internal']: - setup_new_encoders_legacy(encoding) - -setup() - -def utf8_encode_ascii(utf8, errors, errorhandler): - """ Don't be confused - this is a slowpath for errors e.g. "ignore" - or an obscure errorhandler - """ - res = StringBuilder() - i = 0 - pos = 0 - while i < len(utf8): - ch = rutf8.codepoint_at_pos(utf8, i) - if ch >= 0x7F: - msg = "ordinal not in range(128)" - r, newpos = errorhandler(errors, 'ascii', msg, utf8, - pos, pos + 1) - for _ in range(newpos - pos): - i = rutf8.next_codepoint_pos(utf8, i) - pos = newpos - res.append(r) - else: - res.append(chr(ch)) - i = rutf8.next_codepoint_pos(utf8, i) - pos += 1 - - s = res.build() - return s - -# some irregular interfaces -def str_decode_utf8(s, slen, errors, final, errorhandler): - xxxx - - u, pos = runicode.str_decode_utf_8_impl(s, slen, errors, final, w.handle, - runicode.allow_surrogate_by_default) - return u.encode('utf8'), pos, len(u), _get_flag(u) + r = result.build() + lgt, flag = rutf8.check_utf8(r, True) + return r, pos, lgt, flag # ____________________________________________________________ # utf-7 @@ -660,7 +841,6 @@ base64bits >= 6 or (base64bits > 0 and base64buffer != 0)): msg = "unterminated shift sequence" - xxxx res, pos = errorhandler(errors, 'utf7', msg, s, shiftOutStartPos, pos) reslen, resflags = rutf8.check_utf8(res, True) outsize += reslen diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -473,7 +473,7 @@ lgt, flag = rutf8.check_utf8(string, allow_surrogates=True) except rutf8.CheckError: res, consumed, lgt, flag = unicodehelper.str_decode_utf8(string, - len(string), errors, final, state.decode_error_handler) + errors, final, state.decode_error_handler) return space.newtuple([space.newutf8(res, lgt, flag), space.newint(consumed)]) else: diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -199,7 +199,6 @@ return raise CheckError(res) - @jit.elidable def first_non_ascii_char(s): for i in range(len(s)): From pypy.commits at gmail.com Fri Nov 17 12:06:11 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 17 Nov 2017 09:06:11 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Line numbers start from 1 Message-ID: <5a0f1703.86081c0a.2e97f.1042@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93071:43c4fa3bea61 Date: 2017-11-17 17:05 +0000 http://bitbucket.org/pypy/pypy/changeset/43c4fa3bea61/ Log: Line numbers start from 1 diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py --- a/pypy/interpreter/pyparser/error.py +++ b/pypy/interpreter/pyparser/error.py @@ -22,7 +22,7 @@ """(filename, lineno): try: with open(filename) as f: - for _ in range(lineno): + for _ in range(lineno - 1): f.readline() return f.readline() except: # we can't allow any exceptions here! From pypy.commits at gmail.com Fri Nov 17 12:33:13 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 17 Nov 2017 09:33:13 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix did test to match PyPy bytecode Message-ID: <5a0f1d59.4a981c0a.528ef.0735@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93072:4f9bca50104e Date: 2017-11-17 17:32 +0000 http://bitbucket.org/pypy/pypy/changeset/4f9bca50104e/ Log: Fix did test to match PyPy bytecode diff --git a/lib-python/3/test/test_dis.py b/lib-python/3/test/test_dis.py --- a/lib-python/3/test/test_dis.py +++ b/lib-python/3/test/test_dis.py @@ -147,23 +147,24 @@ pass dis_bug1333982 = """\ -%3d 0 LOAD_CONST 1 (0) - 3 POP_JUMP_IF_TRUE 35 - 6 LOAD_GLOBAL 0 (AssertionError) - 9 LOAD_CONST 2 ( at 0x..., file "%s", line %d>) - 12 LOAD_CONST 3 ('bug1333982..') - 15 MAKE_FUNCTION 0 - 18 LOAD_FAST 0 (x) - 21 GET_ITER - 22 CALL_FUNCTION 1 (1 positional, 0 keyword pair) +%3d 0 JUMP_IF_NOT_DEBUG 35 (to 38) + 3 LOAD_CONST 1 (0) + 6 POP_JUMP_IF_TRUE 38 + 9 LOAD_GLOBAL 0 (AssertionError) + 12 LOAD_CONST 2 ( at 0x..., file "%s", line %d>) + 15 LOAD_CONST 3 ('bug1333982..') + 18 MAKE_FUNCTION 0 + 21 LOAD_FAST 0 (x) + 24 GET_ITER + 25 CALL_FUNCTION 1 (1 positional, 0 keyword pair) -%3d 25 LOAD_CONST 4 (1) - 28 BINARY_ADD - 29 CALL_FUNCTION 1 (1 positional, 0 keyword pair) - 32 RAISE_VARARGS 1 +%3d 28 LOAD_CONST 4 (1) + 31 BINARY_ADD + 32 CALL_FUNCTION 1 (1 positional, 0 keyword pair) + 35 RAISE_VARARGS 1 -%3d >> 35 LOAD_CONST 0 (None) - 38 RETURN_VALUE +%3d >> 38 LOAD_CONST 0 (None) + 41 RETURN_VALUE """ % (bug1333982.__code__.co_firstlineno + 1, __file__, bug1333982.__code__.co_firstlineno + 1, From pypy.commits at gmail.com Fri Nov 17 14:24:41 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 17 Nov 2017 11:24:41 -0800 (PST) Subject: [pypy-commit] pypy py3.5: add comment Message-ID: <5a0f3779.d2addf0a.128b0.3239@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93073:6790b83265fa Date: 2017-11-17 19:18 +0000 http://bitbucket.org/pypy/pypy/changeset/6790b83265fa/ Log: add comment diff --git a/lib-python/3/test/test_dis.py b/lib-python/3/test/test_dis.py --- a/lib-python/3/test/test_dis.py +++ b/lib-python/3/test/test_dis.py @@ -146,6 +146,7 @@ 1) pass +# PyPy change: JUMP_IF_NOT_DEBUG dis_bug1333982 = """\ %3d 0 JUMP_IF_NOT_DEBUG 35 (to 38) 3 LOAD_CONST 1 (0) From pypy.commits at gmail.com Fri Nov 17 14:24:43 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 17 Nov 2017 11:24:43 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix doctest to work on builtin functions and methods Message-ID: <5a0f377b.8c6f1c0a.18891.7a0f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93074:ab4627e038e3 Date: 2017-11-17 19:22 +0000 http://bitbucket.org/pypy/pypy/changeset/ab4627e038e3/ Log: Fix doctest to work on builtin functions and methods diff --git a/lib-python/3/doctest.py b/lib-python/3/doctest.py --- a/lib-python/3/doctest.py +++ b/lib-python/3/doctest.py @@ -939,6 +939,8 @@ elif inspect.getmodule(object) is not None: return module is inspect.getmodule(object) elif inspect.isfunction(object): + if isinstance(object.__code__, inspect._builtin_code_type): + return True # XXX: A PyPy builtin - no way to tell return module.__dict__ is object.__globals__ elif inspect.ismethoddescriptor(object): if hasattr(object, '__objclass__'): diff --git a/lib-python/3/test/test_doctest.py b/lib-python/3/test/test_doctest.py --- a/lib-python/3/test/test_doctest.py +++ b/lib-python/3/test/test_doctest.py @@ -660,7 +660,7 @@ >>> import builtins >>> tests = doctest.DocTestFinder().find(builtins) - >>> lo, hi = (120, 140) if is_pypy else (790, 810) + >>> lo, hi = (420, 440) if is_pypy else (790, 810) >>> lo < len(tests) < hi # approximate number of objects with docstrings True >>> real_tests = [t for t in tests if len(t.examples) > 0] From pypy.commits at gmail.com Fri Nov 17 17:54:03 2017 From: pypy.commits at gmail.com (gabr...@ec2-54-146-239-158.compute-1.amazonaws.com) Date: Fri, 17 Nov 2017 14:54:03 -0800 (PST) Subject: [pypy-commit] pypy default: Declare _PyLong_FromByteArray space parameter as const. Message-ID: <5a0f688b.21b9df0a.57c2e.4f99@mx.google.com> Author: gabrielg at ec2-54-146-239-158.compute-1.amazonaws.com Branch: Changeset: r93075:d929dd0ac8bc Date: 2017-11-17 13:04 -0300 http://bitbucket.org/pypy/pypy/changeset/d929dd0ac8bc/ Log: Declare _PyLong_FromByteArray space parameter as const. diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -224,8 +224,9 @@ assert isinstance(w_long, W_LongObject) return w_long.num.sign -UCHARP = rffi.CArrayPtr(rffi.UCHAR) - at cpython_api([UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject) +CONST_UCHARP = lltype.Ptr(lltype.Array(lltype.UChar, hints={'nolength': True, + 'render_as_const': True})) + at cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject) def _PyLong_FromByteArray(space, bytes, n, little_endian, signed): little_endian = rffi.cast(lltype.Signed, little_endian) signed = rffi.cast(lltype.Signed, signed) From pypy.commits at gmail.com Sat Nov 18 04:57:13 2017 From: pypy.commits at gmail.com (arigo) Date: Sat, 18 Nov 2017 01:57:13 -0800 (PST) Subject: [pypy-commit] pypy default: Fix d929dd0ac8bc Message-ID: <5a1003f9.cc9cdf0a.db917.040f@mx.google.com> Author: Armin Rigo Branch: Changeset: r93076:4791c8513684 Date: 2017-11-18 10:56 +0100 http://bitbucket.org/pypy/pypy/changeset/4791c8513684/ Log: Fix d929dd0ac8bc diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -224,7 +224,7 @@ assert isinstance(w_long, W_LongObject) return w_long.num.sign -CONST_UCHARP = lltype.Ptr(lltype.Array(lltype.UChar, hints={'nolength': True, +CONST_UCHARP = lltype.Ptr(lltype.Array(rffi.UCHAR, hints={'nolength': True, 'render_as_const': True})) @cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject) def _PyLong_FromByteArray(space, bytes, n, little_endian, signed): From pypy.commits at gmail.com Sat Nov 18 23:33:17 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 18 Nov 2017 20:33:17 -0800 (PST) Subject: [pypy-commit] pypy fix-broken-types: fix some translation issues Message-ID: <5a11098d.c9b81c0a.6f4b2.ed51@mx.google.com> Author: Ronan Lamy Branch: fix-broken-types Changeset: r93077:1497f86a109d Date: 2016-11-23 07:27 +0000 http://bitbucket.org/pypy/pypy/changeset/1497f86a109d/ Log: fix some translation issues diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -384,9 +384,9 @@ w_ob = w_ob.convert_to_object() # if space.isinstance_w(w_ob, space.w_str): - value = self.cast_str(w_ob) + value = float(self.cast_str(w_ob)) elif space.isinstance_w(w_ob, space.w_unicode): - value = self.cast_unicode(w_ob) + value = float(self.cast_unicode(w_ob)) else: value = space.float_w(w_ob) w_cdata = cdataobj.W_CDataMem(space, self) diff --git a/pypy/module/math/interp_math.py b/pypy/module/math/interp_math.py --- a/pypy/module/math/interp_math.py +++ b/pypy/module/math/interp_math.py @@ -341,7 +341,7 @@ if partials: hi = partials[-1] j = 0 - lo = 0 + lo = 0.0 for j in range(len(partials) - 2, -1, -1): v = hi y = partials[j] diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -454,8 +454,8 @@ return Float64(self.space).box(self.unbox(v)) # numpy 1.10 compatibility raise oefmt(self.space.w_TypeError, "ufunc casting failure") - - + + class Integer(Primitive): _mixin_ = True @@ -1058,9 +1058,9 @@ def logaddexp2(self, v1, v2): tmp = v1 - v2 if tmp > 0: - return v1 + self.npy_log2_1p(math.pow(2, -tmp)) + return v1 + self.npy_log2_1p(math.pow(2., -tmp)) if tmp <= 0: - return v2 + self.npy_log2_1p(math.pow(2, tmp)) + return v2 + self.npy_log2_1p(math.pow(2., tmp)) else: return v1 + v2 @@ -1179,11 +1179,11 @@ imag_str += 'j' # (0+2j) => 2j - if real == 0 and math.copysign(1, real) == 1: + if real == 0. and math.copysign(1., real) == 1.: return imag_str real_str = str_format(real) - op = '+' if imag >= 0 or rfloat.isnan(imag) else '' + op = '+' if imag >= 0. or rfloat.isnan(imag) else '' return ''.join(['(', real_str, op, imag_str, ')']) def runpack_str(self, space, s, native): @@ -1501,13 +1501,13 @@ return rfloat.NAN, 0 if v[0] == 0.0: if v[1] == 0: - return 0, 0 + return 0., 0 if v[1] > 0: - return 1, 0 - return -1, 0 + return 1., 0 + return -1., 0 if v[0] > 0: - return 1, 0 - return -1, 0 + return 1., 0 + return -1., 0 def fmax(self, v1, v2): if self.ge(v1, v2) or self.isnan(v2): diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py --- a/pypy/objspace/std/complexobject.py +++ b/pypy/objspace/std/complexobject.py @@ -220,7 +220,7 @@ div = math.floor(w_div.realval) w_mod = self.sub( W_ComplexObject(other.realval * div, other.imagval * div)) - return (W_ComplexObject(div, 0), w_mod) + return (W_ComplexObject(div, 0.), w_mod) def pow(self, other): rr, ir = rcomplex.c_pow(self.as_tuple(), other.as_tuple()) diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -336,7 +336,7 @@ raise oefmt(space.w_OverflowError, "too large") else: lsb = max(top_exp, rfloat.DBL_MIN_EXP) - rfloat.DBL_MANT_DIG - value = 0 + value = 0. if exp >= lsb: for j in range(total_digits - 1, -1, -1): value = 16.0 * value + _hex_digit(s, j, co_end, diff --git a/rpython/rlib/rcomplex.py b/rpython/rlib/rcomplex.py --- a/rpython/rlib/rcomplex.py +++ b/rpython/rlib/rcomplex.py @@ -70,11 +70,11 @@ def c_pow(x, y): (r1, i1), (r2, i2) = x, y - if i1 == 0 and i2 == 0 and r1 > 0: + if i1 == 0. and i2 == 0. and r1 > 0.: rr = math.pow(r1, r2) ir = 0. elif r2 == 0.0 and i2 == 0.0: - rr, ir = 1, 0 + rr, ir = 1., 0. elif r1 == 1.0 and i1 == 0.0: rr, ir = (1.0, 0.0) elif r1 == 0.0 and i1 == 0.0: @@ -108,22 +108,22 @@ Method: use symmetries to reduce to the case when x = z.real and y = z.imag are nonnegative. Then the real part of the result is given by - + s = sqrt((x + hypot(x, y))/2) - + and the imaginary part is - + d = (y/2)/s - + If either x or y is very large then there's a risk of overflow in computation of the expression x + hypot(x, y). We can avoid this by rewriting the formula for s as: - + s = 2*sqrt(x/8 + hypot(x/8, y/8)) - + This costs us two extra multiplications/divisions, but avoids the overhead of checking for x and y large. - + If both x and y are subnormal then hypot(x, y) may also be subnormal, so will lack full precision. We solve this by rescaling x and y by a sufficiently large power of 2 to ensure that x and y From pypy.commits at gmail.com Sat Nov 18 23:33:20 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 18 Nov 2017 20:33:20 -0800 (PST) Subject: [pypy-commit] pypy fix-broken-types: hg merge default Message-ID: <5a110990.83b91c0a.1d24.4768@mx.google.com> Author: Ronan Lamy Branch: fix-broken-types Changeset: r93078:93d764ccc576 Date: 2016-11-24 02:11 +0000 http://bitbucket.org/pypy/pypy/changeset/93d764ccc576/ Log: hg merge default diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py --- a/pypy/interpreter/argument.py +++ b/pypy/interpreter/argument.py @@ -111,7 +111,9 @@ self.keywords = self.keywords + keywords self.keywords_w = self.keywords_w + values_w return + is_dict = False if space.isinstance_w(w_starstararg, space.w_dict): + is_dict = True keys_w = space.unpackiterable(w_starstararg) else: try: @@ -125,7 +127,9 @@ keys_w = space.unpackiterable(w_keys) keywords_w = [None] * len(keys_w) keywords = [None] * len(keys_w) - _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords, keywords_w, self.keywords) + _do_combine_starstarargs_wrapped( + space, keys_w, w_starstararg, keywords, keywords_w, self.keywords, + is_dict) self.keyword_names_w = keys_w if self.keywords is None: self.keywords = keywords @@ -355,7 +359,7 @@ key) def _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords, - keywords_w, existingkeywords): + keywords_w, existingkeywords, is_dict): i = 0 for w_key in keys_w: try: @@ -374,7 +378,16 @@ "got multiple values for keyword argument '%s'", key) keywords[i] = key - keywords_w[i] = space.getitem(w_starstararg, w_key) + if is_dict: + # issue 2435: bug-to-bug compatibility with cpython. for a subclass of + # dict, just ignore the __getitem__ and access the underlying dict + # directly + from pypy.objspace.descroperation import dict_getitem + w_descr = dict_getitem(space) + w_value = space.get_and_call_function(w_descr, w_starstararg, w_key) + else: + w_value = space.getitem(w_starstararg, w_key) + keywords_w[i] = w_value i += 1 @jit.look_inside_iff( diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py --- a/pypy/interpreter/test/test_argument.py +++ b/pypy/interpreter/test/test_argument.py @@ -120,6 +120,12 @@ raise OperationError(AttributeError, name) return method(*args) + def lookup_in_type_where(self, cls, name): + return 'hopefully not needed', getattr(cls, name) + + def get_and_call_function(self, w_descr, w_obj, *args): + return w_descr.__get__(w_obj)(*args) + def type(self, obj): class Type: def getname(self, space): @@ -805,3 +811,19 @@ assert str(e) == "myerror" else: assert False, "Expected TypeError" + + def test_dict_subclass_with_weird_getitem(self): + # issue 2435: bug-to-bug compatibility with cpython. for a subclass of + # dict, just ignore the __getitem__ and behave like ext_do_call in ceval.c + # which just uses the underlying dict + class d(dict): + def __getitem__(self, key): + return key + + for key in ["foo", u"foo"]: + q = d() + q[key] = "bar" + + def test(**kwargs): + return kwargs + assert test(**q) == {"foo": "bar"} diff --git a/pypy/module/cpyext/dictobject.py b/pypy/module/cpyext/dictobject.py --- a/pypy/module/cpyext/dictobject.py +++ b/pypy/module/cpyext/dictobject.py @@ -137,8 +137,7 @@ """This is the same as PyDict_Merge(a, b, 1) in C, or a.update(b) in Python. Return 0 on success or -1 if an exception was raised. """ - space.call_method(space.w_dict, "update", w_obj, w_other) - return 0 + return PyDict_Merge(space, w_obj, w_other, 1) @cpython_api([PyObject], PyObject) def PyDict_Keys(space, w_obj): diff --git a/pypy/module/cpyext/object.py b/pypy/module/cpyext/object.py --- a/pypy/module/cpyext/object.py +++ b/pypy/module/cpyext/object.py @@ -252,7 +252,10 @@ def PyObject_Format(space, w_obj, w_format_spec): if w_format_spec is None: w_format_spec = space.wrap('') - return space.call_method(w_obj, '__format__', w_format_spec) + w_ret = space.call_method(w_obj, '__format__', w_format_spec) + if space.isinstance_w(w_format_spec, space.w_unicode): + return space.unicode_from_object(w_ret) + return w_ret @cpython_api([PyObject], PyObject) def PyObject_Unicode(space, w_obj): diff --git a/pypy/module/cpyext/test/test_dictobject.py b/pypy/module/cpyext/test/test_dictobject.py --- a/pypy/module/cpyext/test/test_dictobject.py +++ b/pypy/module/cpyext/test/test_dictobject.py @@ -103,6 +103,17 @@ api.PyDict_Update(w_d, w_d2) assert space.unwrap(w_d) == dict(a='b', c='d', e='f') + def test_update_doesnt_accept_list_of_tuples(self, space, api): + w_d = space.newdict() + space.setitem(w_d, space.wrap("a"), space.wrap("b")) + + w_d2 = space.wrap([("c", "d"), ("e", "f")]) + + api.PyDict_Update(w_d, w_d2) + assert api.PyErr_Occurred() is space.w_AttributeError + api.PyErr_Clear() + assert space.unwrap(w_d) == dict(a='b') # unchanged + def test_iter(self, space, api): w_dict = space.sys.getdict(space) py_dict = make_ref(space, w_dict) @@ -199,3 +210,18 @@ """), ]) assert module.dict_proxy({'a': 1, 'b': 2}) == 2 + + def test_update(self): + module = self.import_extension('foo', [ + ("update", "METH_VARARGS", + ''' + if (PyDict_Update(PyTuple_GetItem(args, 0), PyTuple_GetItem(args, 1))) + return NULL; + Py_RETURN_NONE; + ''')]) + d = {"a": 1} + module.update(d, {"c": 2}) + assert d == dict(a=1, c=2) + d = {"a": 1} + raises(AttributeError, module.update, d, [("c", 2)]) + diff --git a/pypy/module/cpyext/test/test_object.py b/pypy/module/cpyext/test/test_object.py --- a/pypy/module/cpyext/test/test_object.py +++ b/pypy/module/cpyext/test/test_object.py @@ -312,6 +312,16 @@ assert isinstance(dict(), collections.Mapping) assert module.ismapping(dict()) + def test_format_returns_unicode(self): + module = self.import_extension('foo', [ + ("empty_format", "METH_O", + """ + PyObject* empty_unicode = PyUnicode_FromStringAndSize("", 0); + PyObject* obj = PyObject_Format(args, empty_unicode); + return obj; + """)]) + a = module.empty_format('hello') + assert isinstance(a, unicode) class AppTestPyBuffer_FillInfo(AppTestCpythonExtensionBase): """ diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py --- a/pypy/module/cpyext/test/test_typeobject.py +++ b/pypy/module/cpyext/test/test_typeobject.py @@ -331,12 +331,34 @@ PyHeapTypeObject *heaptype = (PyHeapTypeObject *)args; Py_INCREF(heaptype->ht_name); return heaptype->ht_name; + '''), + ("setattr", "METH_O", ''' - ) + int ret; + PyObject* name = PyString_FromString("mymodule"); + PyObject *obj = PyType_Type.tp_alloc(&PyType_Type, 0); + PyHeapTypeObject *type = (PyHeapTypeObject*)obj; + if ((type->ht_type.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0) + { + PyErr_SetString(PyExc_ValueError, + "Py_TPFLAGS_HEAPTYPE not set"); + return NULL; + } + type->ht_type.tp_name = ((PyTypeObject*)args)->tp_name; + PyType_Ready(&type->ht_type); + ret = PyObject_SetAttrString((PyObject*)&type->ht_type, + "__module__", name); + Py_DECREF(name); + if (ret < 0) + return NULL; + return PyLong_FromLong(ret); + '''), ]) class C(object): pass assert module.name_by_heaptype(C) == "C" + assert module.setattr(C) == 0 + def test_type_dict(self): foo = self.import_module("foo") diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py --- a/pypy/module/cpyext/typeobject.py +++ b/pypy/module/cpyext/typeobject.py @@ -469,7 +469,7 @@ W_TypeObject.__init__(self, space, name, bases_w or [space.w_object], dict_w, force_new_layout=new_layout) self.flag_cpytype = True - self.flag_heaptype = False + self.flag_heaptype = pto.c_tp_flags & Py_TPFLAGS_HEAPTYPE # if a sequence or a mapping, then set the flag to force it if pto.c_tp_as_sequence and pto.c_tp_as_sequence.c_sq_item: self.flag_map_or_seq = 'S' @@ -852,14 +852,14 @@ w_obj = space.allocate_instance(W_PyCTypeObject, w_metatype) track_reference(space, py_obj, w_obj) # __init__ wraps all slotdefs functions from py_type via add_operators - w_obj.__init__(space, py_type) + w_obj.__init__(space, py_type) w_obj.ready() finish_type_2(space, py_type, w_obj) base = py_type.c_tp_base if base: # XXX refactor - parts of this are done in finish_type_2 -> inherit_slots - if not py_type.c_tp_as_number: + if not py_type.c_tp_as_number: py_type.c_tp_as_number = base.c_tp_as_number py_type.c_tp_flags |= base.c_tp_flags & Py_TPFLAGS_CHECKTYPES py_type.c_tp_flags |= base.c_tp_flags & Py_TPFLAGS_HAVE_INPLACEOPS diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py --- a/pypy/objspace/descroperation.py +++ b/pypy/objspace/descroperation.py @@ -61,16 +61,24 @@ @specialize.memo() def str_getitem(space): "Utility that returns the app-level descriptor str.__getitem__." - w_src, w_iter = space.lookup_in_type_where(space.w_str, - '__getitem__') - return w_iter + w_src, w_getitem = space.lookup_in_type_where(space.w_str, + '__getitem__') + return w_getitem @specialize.memo() def unicode_getitem(space): "Utility that returns the app-level descriptor unicode.__getitem__." - w_src, w_iter = space.lookup_in_type_where(space.w_unicode, - '__getitem__') - return w_iter + w_src, w_getitem = space.lookup_in_type_where(space.w_unicode, + '__getitem__') + return w_getitem + + at specialize.memo() +def dict_getitem(space): + "Utility that returns the app-level descriptor dict.__getitem__." + w_src, w_getitem = space.lookup_in_type_where(space.w_dict, + '__getitem__') + return w_getitem + def raiseattrerror(space, w_obj, name, w_descr=None): if w_descr is None: diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -22,7 +22,8 @@ """Block annotator for RPython. See description in doc/translation.txt.""" - def __init__(self, translator=None, policy=None, bookkeeper=None): + def __init__(self, translator=None, policy=None, bookkeeper=None, + keepgoing=False): import rpython.rtyper.extfuncregistry # has side effects if translator is None: @@ -50,6 +51,9 @@ if bookkeeper is None: bookkeeper = Bookkeeper(self) self.bookkeeper = bookkeeper + self.keepgoing = keepgoing + self.failed_blocks = set() + self.errors = [] # temporary feature flag, see config.translation.brokentypes # defaults to True in real translations self.allow_bad_unions = False @@ -206,6 +210,12 @@ else: newgraphs = self.translator.graphs #all of them got_blocked_blocks = False in self.annotated.values() + if self.failed_blocks: + text = ('Annotation failed, %s errors were recorded:' % + len(self.errors)) + text += '\n-----'.join(str(e) for e in self.errors) + raise annmodel.AnnotatorError(text) + if got_blocked_blocks: for graph in self.blocked_graphs.values(): self.blocked_graphs[graph] = True @@ -352,6 +362,8 @@ #print '* processblock', block, cells self.annotated[block] = graph + if block in self.failed_blocks: + return if block in self.blocked_blocks: del self.blocked_blocks[block] try: @@ -396,6 +408,10 @@ except annmodel.UnionError as e: # Add source code to the UnionError e.source = '\n'.join(source_lines(graph, block, None, long=True)) + if self.keepgoing: + self.errors.append(e) + self.failed_blocks.add(block) + return raise # if the merged cells changed, we must redo the analysis if unions != oldcells: @@ -486,6 +502,10 @@ except annmodel.AnnotatorError as e: # note that UnionError is a subclass e.source = gather_error(self, graph, block, i) + if self.keepgoing: + self.errors.append(e) + self.failed_blocks.add(block) + return raise else: diff --git a/rpython/config/translationoption.py b/rpython/config/translationoption.py --- a/rpython/config/translationoption.py +++ b/rpython/config/translationoption.py @@ -193,6 +193,10 @@ "When true, enable the use of tagged pointers. " "If false, use normal boxing", default=False), + BoolOption("keepgoing", + "Continue annotating when errors are encountered, and report " + "them all at the end of the annotation phase", + default=False, cmdline="--keepgoing"), BoolOption("lldebug", "If true, makes an lldebug build", default=False, cmdline="--lldebug"), diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -399,9 +399,9 @@ def optimize_INT_EQ(self, op): arg0 = self.get_box_replacement(op.getarg(0)) + b1 = self.getintbound(arg0) arg1 = self.get_box_replacement(op.getarg(1)) - b1 = self.getintbound(op.getarg(0)) - b2 = self.getintbound(op.getarg(1)) + b2 = self.getintbound(arg1) if b1.known_gt(b2): self.make_constant_int(op, 0) elif b1.known_lt(b2): diff --git a/rpython/rlib/objectmodel.py b/rpython/rlib/objectmodel.py --- a/rpython/rlib/objectmodel.py +++ b/rpython/rlib/objectmodel.py @@ -323,6 +323,8 @@ def register_replacement_for(replaced_function, sandboxed_name=None): def wrap(func): from rpython.rtyper.extregistry import ExtRegistryEntry + # to support calling func directly + func._sandbox_external_name = sandboxed_name class ExtRegistry(ExtRegistryEntry): _about_ = replaced_function def compute_annotation(self): diff --git a/rpython/rlib/rfloat.py b/rpython/rlib/rfloat.py --- a/rpython/rlib/rfloat.py +++ b/rpython/rlib/rfloat.py @@ -1,6 +1,7 @@ """Float constants""" import math, struct +from math import isinf, isnan, copysign, acosh, asinh, atanh, log1p, expm1 from rpython.annotator.model import SomeString, SomeChar from rpython.rlib import objectmodel, unroll @@ -184,104 +185,6 @@ INFINITY = 1e200 * 1e200 NAN = abs(INFINITY / INFINITY) # bah, INF/INF gives us -NAN? -try: - # Try to get math functions added in 2.6. - from math import isinf, isnan, copysign, acosh, asinh, atanh, log1p -except ImportError: - @not_rpython - def isinf(x): - return x == INFINITY or x == -INFINITY - - @not_rpython - def isnan(v): - return v != v - - @not_rpython - def copysign(x, y): - """Return x with the sign of y""" - if x < 0.: - x = -x - if y > 0. or (y == 0. and math.atan2(y, -1.) > 0.): - return x - else: - return -x - - _2_to_m28 = 3.7252902984619141E-09; # 2**-28 - _2_to_p28 = 268435456.0; # 2**28 - _ln2 = 6.93147180559945286227E-01 - - @not_rpython - def acosh(x): - if isnan(x): - return NAN - if x < 1.: - raise ValueError("math domain error") - if x >= _2_to_p28: - if isinf(x): - return x - else: - return math.log(x) + _ln2 - if x == 1.: - return 0. - if x >= 2.: - t = x * x - return math.log(2. * x - 1. / (x + math.sqrt(t - 1.0))) - t = x - 1.0 - return log1p(t + math.sqrt(2. * t + t * t)) - - @not_rpython - def asinh(x): - absx = abs(x) - if not isfinite(x): - return x - if absx < _2_to_m28: - return x - if absx > _2_to_p28: - w = math.log(absx) + _ln2 - elif absx > 2.: - w = math.log(2. * absx + 1. / (math.sqrt(x * x + 1.) + absx)) - else: - t = x * x - w = log1p(absx + t / (1. + math.sqrt(1. + t))) - return copysign(w, x) - - @not_rpython - def atanh(x): - if isnan(x): - return x - absx = abs(x) - if absx >= 1.: - raise ValueError("math domain error") - if absx < _2_to_m28: - return x - if absx < .5: - t = absx + absx - t = .5 * log1p(t + t * absx / (1. - absx)) - else: - t = .5 * log1p((absx + absx) / (1. - absx)) - return copysign(t, x) - - @not_rpython - def log1p(x): - if abs(x) < DBL_EPSILON // 2.: - return x - elif -.5 <= x <= 1.: - y = 1. + x - return math.log(y) - ((y - 1.) - x) / y - else: - return math.log(1. + x) - -try: - from math import expm1 # Added in Python 2.7. -except ImportError: - @not_rpython - def expm1(x): - if abs(x) < .7: - u = math.exp(x) - if u == 1.: - return x - return (u - 1.) * x / math.log(u) - return math.exp(x) - 1. def log2(x): # Uses an algorithm that should: diff --git a/rpython/translator/sandbox/test/test_sandbox.py b/rpython/translator/sandbox/test/test_sandbox.py --- a/rpython/translator/sandbox/test/test_sandbox.py +++ b/rpython/translator/sandbox/test/test_sandbox.py @@ -65,6 +65,24 @@ f.close() assert tail == "" +def test_open_dup_rposix(): + from rpython.rlib import rposix + def entry_point(argv): + fd = rposix.open("/tmp/foobar", os.O_RDONLY, 0777) + assert fd == 77 + fd2 = rposix.dup(fd) + assert fd2 == 78 + return 0 + + exe = compile(entry_point) + g, f = run_in_subprocess(exe) + expect(f, g, "ll_os.ll_os_open", ("/tmp/foobar", os.O_RDONLY, 0777), 77) + expect(f, g, "ll_os.ll_os_dup", (77, True), 78) + g.close() + tail = f.read() + f.close() + assert tail == "" + def test_read_write(): def entry_point(argv): fd = os.open("/tmp/foobar", os.O_RDONLY, 0777) diff --git a/rpython/translator/translator.py b/rpython/translator/translator.py --- a/rpython/translator/translator.py +++ b/rpython/translator/translator.py @@ -67,7 +67,8 @@ if self.annotator is not None: raise ValueError("we already have an annotator") from rpython.annotator.annrpython import RPythonAnnotator - self.annotator = RPythonAnnotator(self, policy=policy) + self.annotator = RPythonAnnotator( + self, policy=policy, keepgoing=self.config.translation.keepgoing) self.annotator.allow_bad_unions = self.config.translation.brokentypes return self.annotator From pypy.commits at gmail.com Sat Nov 18 23:33:22 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 18 Nov 2017 20:33:22 -0800 (PST) Subject: [pypy-commit] pypy fix-broken-types: translation fixes Message-ID: <5a110992.31a9df0a.13a2b.796c@mx.google.com> Author: Ronan Lamy Branch: fix-broken-types Changeset: r93079:839732d2f9d2 Date: 2016-11-24 02:25 +0000 http://bitbucket.org/pypy/pypy/changeset/839732d2f9d2/ Log: translation fixes diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py --- a/pypy/module/micronumpy/types.py +++ b/pypy/module/micronumpy/types.py @@ -894,7 +894,7 @@ @simple_unary_op def exp2(self, v): try: - return math.pow(2, v) + return math.pow(2., v) except OverflowError: return rfloat.INFINITY @@ -1587,7 +1587,7 @@ @complex_unary_op def exp2(self, v): try: - return rcomplex.c_pow((2,0), v) + return rcomplex.c_pow((2., 0.), v) except OverflowError: return rfloat.INFINITY, rfloat.NAN except ValueError: @@ -1752,7 +1752,7 @@ try: return rcomplex.c_log(v[0] + 1, v[1]) except OverflowError: - return -rfloat.INFINITY, 0 + return -rfloat.INFINITY, 0. except ValueError: return rfloat.NAN, rfloat.NAN diff --git a/pypy/objspace/std/floatobject.py b/pypy/objspace/std/floatobject.py --- a/pypy/objspace/std/floatobject.py +++ b/pypy/objspace/std/floatobject.py @@ -365,7 +365,7 @@ value += 2 * half_eps mant_dig = rfloat.DBL_MANT_DIG if (top_exp == rfloat.DBL_MAX_EXP and - value == math.ldexp(2 * half_eps, mant_dig)): + value == math.ldexp(2 * float(half_eps), mant_dig)): raise oefmt(space.w_OverflowError, "too large") value = math.ldexp(value, (exp + 4*key_digit)) while i < length and s[i].isspace(): From pypy.commits at gmail.com Sat Nov 18 23:54:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 18 Nov 2017 20:54:53 -0800 (PST) Subject: [pypy-commit] pypy default: Fix yet another misuse of py.test.skip Message-ID: <5a110e9d.d18d1c0a.1f0fd.0065@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93082:e5808d8c24ff Date: 2017-11-19 04:54 +0000 http://bitbucket.org/pypy/pypy/changeset/e5808d8c24ff/ Log: Fix yet another misuse of py.test.skip diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -2,7 +2,7 @@ from rpython.rtyper.test.test_llinterp import interpret from rpython.rlib.rarithmetic import * from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError -from hypothesis import given, strategies +from hypothesis import given, strategies, assume import sys import py @@ -404,8 +404,11 @@ def test_int_c_div_mod(x, y): assert int_c_div(~x, y) == -(abs(~x) // y) assert int_c_div( x,-y) == -(x // y) - if (x, y) == (sys.maxint, 1): - py.test.skip("would overflow") + + at given(strategies.integers(min_value=0, max_value=sys.maxint), + strategies.integers(min_value=1, max_value=sys.maxint)) +def test_int_c_div_mod_2(x, y): + assume((x, y) != (sys.maxint, 1)) # This case would overflow assert int_c_div(~x,-y) == +(abs(~x) // y) for x1 in [x, ~x]: for y1 in [y, -y]: From pypy.commits at gmail.com Sun Nov 19 04:20:55 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 19 Nov 2017 01:20:55 -0800 (PST) Subject: [pypy-commit] pypy default: Fix this fragile test (it broke because of 167b802baf3b, unsure why) Message-ID: <5a114cf7.49c71c0a.ec657.3c99@mx.google.com> Author: Armin Rigo Branch: Changeset: r93083:eb297be15f48 Date: 2017-11-19 10:20 +0100 http://bitbucket.org/pypy/pypy/changeset/eb297be15f48/ Log: Fix this fragile test (it broke because of 167b802baf3b, unsure why) diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py --- a/pypy/module/thread/test/test_import_lock.py +++ b/pypy/module/thread/test/test_import_lock.py @@ -105,7 +105,7 @@ assert importlock.count == 0 # A new module importhook(space, 're') - assert importlock.count == 9 + assert importlock.count >= 9 # Import it again previous_count = importlock.count importhook(space, 're') From pypy.commits at gmail.com Sun Nov 19 12:38:52 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 19 Nov 2017 09:38:52 -0800 (PST) Subject: [pypy-commit] pypy default: unbreak macos build Message-ID: <5a11c1ac.17361c0a.271b9.0487@mx.google.com> Author: Matti Picus Branch: Changeset: r93084:a306385caebf Date: 2017-11-19 19:37 +0200 http://bitbucket.org/pypy/pypy/changeset/a306385caebf/ Log: unbreak macos build diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -69,7 +69,7 @@ post_include_bits=[], compile_extra=compile_extra ) -if sys.platform.startswith('linux'): +if sys.platform != 'win32': eci_kwds['separate_module_files'].append( SHARED.join('vmprof_mt.c'), ) From pypy.commits at gmail.com Sun Nov 19 17:07:37 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 19 Nov 2017 14:07:37 -0800 (PST) Subject: [pypy-commit] pypy default: Follow-up for cb9634421fa2: revert the very general change and instead Message-ID: <5a1200a9.5d87df0a.a0b86.a0fb@mx.google.com> Author: Armin Rigo Branch: Changeset: r93085:d00a16ef468f Date: 2017-11-19 23:04 +0100 http://bitbucket.org/pypy/pypy/changeset/d00a16ef468f/ Log: Follow-up for cb9634421fa2: revert the very general change and instead improve the logic at the point where it matters, with a comment. diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -15,34 +15,10 @@ typeof, s_ImpossibleValue, SomeInstance, intersection, difference) from rpython.annotator.bookkeeper import Bookkeeper from rpython.rtyper.normalizecalls import perform_normalizations -from collections import deque log = AnsiLogger("annrpython") -class ShuffleDict(object): - def __init__(self): - self._d = {} - self.keys = deque() - - def __setitem__(self, k, v): - if k in self._d: - self._d[k] = v - else: - self._d[k] = v - self.keys.append(k) - - def __getitem__(self, k): - return self._d[k] - - def popitem(self): - key = self.keys.popleft() - item = self._d.pop(key) - return (key, item) - - def __nonzero__(self): - return bool(self._d) - class RPythonAnnotator(object): """Block annotator for RPython. See description in doc/translation.txt.""" @@ -57,7 +33,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = ShuffleDict() # map {block: graph-containing-it} + self.pendingblocks = {} # map {block: graph-containing-it} self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed @@ -216,8 +192,15 @@ def complete_pending_blocks(self): while self.pendingblocks: - block, graph = self.pendingblocks.popitem() - self.processblock(graph, block) + # Grab all blocks from 'self.pendingblocks' in a list, and + # walk that list. This prevents a situation where the same + # block is added over and over again to 'self.pendingblocks' + # and the code here would pop that same block from the dict + # over and over again, without ever looking at other blocks. + all_blocks = self.pendingblocks.keys() + for block in all_blocks: + graph = self.pendingblocks.pop(block) + self.processblock(graph, block) def complete(self): """Process pending blocks until none is left.""" From pypy.commits at gmail.com Sun Nov 19 18:33:13 2017 From: pypy.commits at gmail.com (arigo) Date: Sun, 19 Nov 2017 15:33:13 -0800 (PST) Subject: [pypy-commit] pypy default: Tweak: 16% speed increase of PyPy annotation, by avoiding the Message-ID: <5a1214b9.169a1c0a.7c8af.d8dc@mx.google.com> Author: Armin Rigo Branch: Changeset: r93086:60c4fa1b0539 Date: 2017-11-20 00:32 +0100 http://bitbucket.org/pypy/pypy/changeset/60c4fa1b0539/ Log: Tweak: 16% speed increase of PyPy annotation, by avoiding the situation where a block is rescheduled many many times before it is finally resolved (e.g. because resolving it requires (re)flowing through a very long chain of blocks). diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -33,7 +33,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = {} # map {block: graph-containing-it} + self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed @@ -57,7 +57,7 @@ self.errors = [] def __getstate__(self): - attrs = """translator pendingblocks annotated links_followed + attrs = """translator genpendingblocks annotated links_followed notify bookkeeper frozen policy added_blocks""".split() ret = self.__dict__.copy() for key, value in ret.items(): @@ -188,18 +188,39 @@ else: self.mergeinputargs(graph, block, cells) if not self.annotated[block]: - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) + + def schedulependingblock(self, graph, block): + # 'self.genpendingblocks' is a list of dictionaries which is + # logically equivalent to just one dictionary. But we keep a + # 'generation' number on each block (=key), and whenever we + # process a block, we increase its generation number. The + # block is added to the 'genpendingblocks' indexed by its + # generation number. See complete_pending_blocks() below. + generation = getattr(block, 'generation', 0) + self.genpendingblocks[generation][block] = graph def complete_pending_blocks(self): - while self.pendingblocks: - # Grab all blocks from 'self.pendingblocks' in a list, and - # walk that list. This prevents a situation where the same - # block is added over and over again to 'self.pendingblocks' - # and the code here would pop that same block from the dict - # over and over again, without ever looking at other blocks. - all_blocks = self.pendingblocks.keys() - for block in all_blocks: - graph = self.pendingblocks.pop(block) + while True: + # Find the first of the dictionaries in 'self.genpendingblocks' + # which is not empty + gen = 0 + for pendingblocks in self.genpendingblocks: + if pendingblocks: + break + gen += 1 + else: + return # all empty => done + + gen += 1 # next generation number + if len(self.genpendingblocks) == gen: + self.genpendingblocks.append({}) + + # Process all blocks at this level + # (if any gets re-inserted, it will be into the next level) + while pendingblocks: + block, graph = pendingblocks.popitem() + block.generation = gen self.processblock(graph, block) def complete(self): @@ -207,7 +228,7 @@ while True: self.complete_pending_blocks() self.policy.no_more_blocks_to_annotate(self) - if not self.pendingblocks: + if not any(self.genpendingblocks): break # finished # make sure that the return variables of all graphs is annotated if self.added_blocks is not None: @@ -393,7 +414,7 @@ def reflowpendingblock(self, graph, block): assert not self.frozen assert graph not in self.fixed_graphs - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) assert block in self.annotated self.annotated[block] = False # must re-flow self.blocked_blocks[block] = (graph, None) diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py --- a/rpython/flowspace/model.py +++ b/rpython/flowspace/model.py @@ -170,7 +170,7 @@ class Block(object): __slots__ = """inputargs operations exitswitch - exits blockcolor""".split() + exits blockcolor generation""".split() def __init__(self, inputargs): self.inputargs = list(inputargs) # mixed list of variable/const XXX From pypy.commits at gmail.com Mon Nov 20 05:14:36 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 02:14:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: start working on more obscure codecs and completely remove hacks that go via UCS4 from unicodehelper. Now unicodehelper no longer uses runicode Message-ID: <5a12ab0c.01a4df0a.acba3.439b@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93087:3e5acb0a1e81 Date: 2017-11-20 11:13 +0100 http://bitbucket.org/pypy/pypy/changeset/3e5acb0a1e81/ Log: start working on more obscure codecs and completely remove hacks that go via UCS4 from unicodehelper. Now unicodehelper no longer uses runicode diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1,7 +1,9 @@ +import sys + from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize -from rpython.rlib import runicode, rutf8 -from rpython.rlib.rarithmetic import r_uint +from rpython.rlib import rutf8 +from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder from pypy.module._codecs import interp_codecs @@ -168,47 +170,6 @@ r = res.build() return r -class DecodeWrapper(object): - def __init__(self, handler): - self.orig = handler - - def handle(self, errors, encoding, msg, s, pos, endpos): - return self.orig(errors, encoding, msg, s, pos, endpos) - -class EncodeWrapper(object): - def __init__(self, handler): - self.orig = handler - - def handle(self, errors, encoding, msg, s, pos, endpos): - return self.orig(errors, encoding, msg, s.encode("utf8"), pos, endpos) - -def setup_new_encoders_legacy(encoding): - encoder_name = 'utf8_encode_' + encoding - encoder_call_name = 'unicode_encode_' + encoding - decoder_name = 'str_decode_' + encoding - def encoder(utf8, errors, errorhandler): - u = utf8.decode("utf8") - w = EncodeWrapper(errorhandler) - return getattr(runicode, encoder_call_name)(u, len(u), errors, - w.handle) - def decoder(s, slen, errors, final, errorhandler): - w = DecodeWrapper((errorhandler)) - u, pos = getattr(runicode, decoder_name)(s, slen, errors, final, w.handle) - return u.encode('utf8'), pos, len(u), _get_flag(u) - encoder.__name__ = encoder_name - decoder.__name__ = decoder_name - if encoder_name not in globals(): - globals()[encoder_name] = encoder - if decoder_name not in globals(): - globals()[decoder_name] = decoder - -def setup(): - for encoding in ['utf_16', 'utf_16_le', 'utf_16_be', 'utf_32_le', 'utf_32', - 'utf_32_be', 'unicode_internal']: - setup_new_encoders_legacy(encoding) - -setup() - def utf8_encode_ascii(utf8, errors, errorhandler): """ Don't be confused - this is a slowpath for errors e.g. "ignore" or an obscure errorhandler @@ -618,6 +579,41 @@ lgt, flag = rutf8.check_utf8(r, True) return r, pos, lgt, flag + +TABLE = '0123456789abcdef' + +def raw_unicode_escape_helper(result, char): + if char >= 0x10000 or char < 0: + result.append("\\U") + zeros = 8 + elif char >= 0x100: + result.append("\\u") + zeros = 4 + else: + result.append("\\x") + zeros = 2 + for i in range(zeros-1, -1, -1): + result.append(TABLE[(char >> (4 * i)) & 0x0f]) + +def utf8_encode_raw_unicode_escape(s, errors, errorhandler=None): + # errorhandler is not used: this function cannot cause Unicode errors + size = len(s) + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + while pos < size: + oc = ord(s[pos]) + + if oc < 0x100: + result.append(chr(oc)) + else: + raw_unicode_escape_helper(result, oc) + pos += 1 + + return result.build() + + # ____________________________________________________________ # utf-7 @@ -896,3 +892,395 @@ result.append('-') return result.build() + +# ____________________________________________________________ +# utf-16 + +BYTEORDER = sys.byteorder +BYTEORDER2 = BYTEORDER[0] + 'e' # either "le" or "be" +assert BYTEORDER2 in ('le', 'be') + +def str_decode_utf_16(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "native") + return result, c, lgt, flag + +def str_decode_utf_16_be(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "big") + return result, c, lgt, flag + +def str_decode_utf_16_le(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_16_helper(s, errors, final, + errorhandler, "little") + return result, c, lgt, flag + +def str_decode_utf_16_helper(s, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf16'): + size = len(s) + bo = 0 + + if BYTEORDER == 'little': + ihi = 1 + ilo = 0 + else: + ihi = 0 + ilo = 1 + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 2: + bom = (ord(s[ihi]) << 8) | ord(s[ilo]) + if BYTEORDER == 'little': + if bom == 0xFEFF: + pos += 2 + bo = -1 + elif bom == 0xFFFE: + pos += 2 + bo = 1 + else: + if bom == 0xFEFF: + pos += 2 + bo = 1 + elif bom == 0xFFFE: + pos += 2 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + ihi = 1 + ilo = 0 + + elif bo == 1: + # force big endian + ihi = 0 + ilo = 1 + + result = StringBuilder(size // 2) + + #XXX I think the errors are not correctly handled here + while pos < size: + # remaining bytes at the end? (size should be even) + if len(s) - pos < 2: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 2: + break + ch = (ord(s[pos + ihi]) << 8) | ord(s[pos + ilo]) + pos += 2 + if ch < 0xD800 or ch > 0xDFFF: + rutf8.unichr_as_utf8_append(result, ch) + continue + # UTF-16 code pair: + if len(s) - pos < 2: + pos -= 2 + if not final: + break + errmsg = "unexpected end of data" + r, pos = errorhandler(errors, public_encoding_name, + errmsg, s, pos, len(s)) + result.append(r) + if len(s) - pos < 2: + break + elif 0xD800 <= ch <= 0xDBFF: + ch2 = (ord(s[pos+ihi]) << 8) | ord(s[pos+ilo]) + pos += 2 + if 0xDC00 <= ch2 <= 0xDFFF: + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 + rutf8.unichr_as_utf8_append(result, ch) + continue + else: + r, pos = errorhandler(errors, public_encoding_name, + "illegal UTF-16 surrogate", + s, pos - 4, pos - 2) + result.append(r) + else: + r, pos = errorhandler(errors, public_encoding_name, + "illegal encoding", + s, pos - 2, pos) + result.append(r) + r = result.build() + lgt, flag = rutf8.check_utf8(r, True) + return result.build(), pos, lgt, flag, bo + +def _STORECHAR(result, CH, byteorder): + hi = chr(((CH) >> 8) & 0xff) + lo = chr((CH) & 0xff) + if byteorder == 'little': + result.append(lo) + result.append(hi) + else: + result.append(hi) + result.append(lo) + +def unicode_encode_utf_16_helper(s, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf16'): + size = len(s) + if size == 0: + if byteorder == 'native': + result = StringBuilder(2) + _STORECHAR(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 2 + 2) + if byteorder == 'native': + _STORECHAR(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + pos = rutf8.next_codepoint_pos(s, pos) + + if ch < 0xD800: + _STORECHAR(result, ch, byteorder) + elif ch >= 0x10000: + _STORECHAR(result, 0xD800 | ((ch-0x10000) >> 10), byteorder) + _STORECHAR(result, 0xDC00 | ((ch-0x10000) & 0x3FF), byteorder) + elif ch >= 0xE000 or allow_surrogates: + _STORECHAR(result, ch, byteorder) + else: + ru, pos = errorhandler(errors, public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + xxx + #if rs is not None: + # # py3k only + # if len(rs) % 2 != 0: + # errorhandler('strict', public_encoding_name, + # 'surrogates not allowed', + # s, pos-1, pos) + # result.append(rs) + # continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR(result, ord(ch), byteorder) + else: + errorhandler('strict', public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + continue + + return result.build() + +def utf8_encode_utf_16(s, errors, + errorhandler=None, + allow_surrogates=True): + return unicode_encode_utf_16_helper(s, errors, errorhandler, + allow_surrogates, "native") + +def utf8_encode_utf_16_be(s, errors, + errorhandler=None, + allow_surrogates=True): + return unicode_encode_utf_16_helper(s, errors, errorhandler, + allow_surrogates, "big") + +def utf8_encode_utf_16_le(s, errors, + errorhandler=None, + allow_surrogates=True): + return unicode_encode_utf_16_helper(s, errors, errorhandler, + allow_surrogates, "little") + +# ____________________________________________________________ +# utf-32 + +def str_decode_utf_32(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + errorhandler, "native") + return result, c, lgt, flag + +def str_decode_utf_32_be(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + errorhandler, "big") + return result, c, lgt, flag + +def str_decode_utf_32_le(s, errors, final=True, + errorhandler=None): + result, c, lgt, flag, _ = str_decode_utf_32_helper(s, errors, final, + errorhandler, "little") + return result, c, lgt, flag + +BOM32_DIRECT = intmask(0x0000FEFF) +BOM32_REVERSE = intmask(0xFFFE0000) + +def str_decode_utf_32_helper(s, errors, final=True, + errorhandler=None, + byteorder="native", + public_encoding_name='utf32'): + bo = 0 + size = len(s) + + if BYTEORDER == 'little': + iorder = [0, 1, 2, 3] + else: + iorder = [3, 2, 1, 0] + + # Check for BOM marks (U+FEFF) in the input and adjust current + # byte order setting accordingly. In native mode, the leading BOM + # mark is skipped, in all other modes, it is copied to the output + # stream as-is (giving a ZWNBSP character). + pos = 0 + if byteorder == 'native': + if size >= 4: + bom = intmask( + (ord(s[iorder[3]]) << 24) | (ord(s[iorder[2]]) << 16) | + (ord(s[iorder[1]]) << 8) | ord(s[iorder[0]])) + if BYTEORDER == 'little': + if bom == BOM32_DIRECT: + pos += 4 + bo = -1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = 1 + else: + if bom == BOM32_DIRECT: + pos += 4 + bo = 1 + elif bom == BOM32_REVERSE: + pos += 4 + bo = -1 + elif byteorder == 'little': + bo = -1 + else: + bo = 1 + if size == 0: + return u'', 0, bo + if bo == -1: + # force little endian + iorder = [0, 1, 2, 3] + + elif bo == 1: + # force big endian + iorder = [3, 2, 1, 0] + + result = StringBuilder(size // 4) + + while pos < size: + # remaining bytes at the end? (size should be divisible by 4) + if len(s) - pos < 4: + if not final: + break + r, pos = errorhandler(errors, public_encoding_name, + "truncated data", + s, pos, len(s)) + result.append(r) + if len(s) - pos < 4: + break + continue + ch = ((ord(s[pos + iorder[3]]) << 24) | (ord(s[pos + iorder[2]]) << 16) | + (ord(s[pos + iorder[1]]) << 8) | ord(s[pos + iorder[0]])) + if ch >= 0x110000: + r, pos = errorhandler(errors, public_encoding_name, + "codepoint not in range(0x110000)", + s, pos, len(s)) + result.append(r) + continue + + rutf8.unichr_as_utf8_append(result, ch) + pos += 4 + r = result.build() + lgt, flag = rutf8.check_utf8(r, True) + return r, pos, lgt, flag, bo + +def _STORECHAR32(result, CH, byteorder): + c0 = chr(((CH) >> 24) & 0xff) + c1 = chr(((CH) >> 16) & 0xff) + c2 = chr(((CH) >> 8) & 0xff) + c3 = chr((CH) & 0xff) + if byteorder == 'little': + result.append(c3) + result.append(c2) + result.append(c1) + result.append(c0) + else: + result.append(c0) + result.append(c1) + result.append(c2) + result.append(c3) + +def unicode_encode_utf_32_helper(s, errors, + errorhandler=None, + allow_surrogates=True, + byteorder='little', + public_encoding_name='utf32'): + size = len(s) + if size == 0: + if byteorder == 'native': + result = StringBuilder(4) + _STORECHAR32(result, 0xFEFF, BYTEORDER) + return result.build() + return "" + + result = StringBuilder(size * 4 + 4) + if byteorder == 'native': + _STORECHAR32(result, 0xFEFF, BYTEORDER) + byteorder = BYTEORDER + + pos = 0 + while pos < size: + ch = rutf8.codepoint_at_pos(s, pos) + pos = rutf8.next_codepoint_pos(s, pos) + ch2 = 0 + if not allow_surrogates and 0xD800 <= ch < 0xE000: + ru, pos = errorhandler(errors, public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + XXX + if rs is not None: + # py3k only + if len(rs) % 4 != 0: + errorhandler('strict', public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + result.append(rs) + continue + for ch in ru: + if ord(ch) < 0xD800: + _STORECHAR32(result, ord(ch), byteorder) + else: + errorhandler('strict', public_encoding_name, + 'surrogates not allowed', + s, pos-1, pos) + continue + _STORECHAR32(result, ch, byteorder) + + return result.build() + +def utf8_encode_utf_32(s, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, errors, errorhandler, + allow_surrogates, "native") + +def utf8_encode_utf_32_be(s, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, errors, errorhandler, + allow_surrogates, "big") + +def utf8_encode_utf_32_le(s, errors, + errorhandler=None, allow_surrogates=True): + return unicode_encode_utf_32_helper(s, errors, errorhandler, + allow_surrogates, "little") diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -30,6 +30,10 @@ endpos): """Generic wrapper for calling into error handlers. + Note that error handler receives and returns position into + the unicode characters, not into the position of utf8 bytes, + so it needs to be converted by the codec + Returns (unicode_or_none, str_or_none, newpos) as error handlers may return unicode or on Python 3, bytes. """ diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -15,6 +15,7 @@ 'utf-32', 'utf-32-le', 'utf-32-be', 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + print encoding assert unicode(u.encode(encoding),encoding) == u def test_ucs4(self): @@ -115,10 +116,10 @@ raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000}) assert (charmap_decode("\x00\x01\x02", "strict", {0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) assert (charmap_decode("\x00\x01\x02", "strict", {0: u'\U0010FFFF', 1: u'b', 2: u'c'}) == - u"\U0010FFFFbc", 3) + (u"\U0010FFFFbc", 3)) def test_escape_decode_errors(self): from _codecs import escape_decode as decode diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -1194,7 +1194,7 @@ assert False, "always raises" return space.newbytes(s) if ((encoding is None and space.sys.defaultencoding == 'utf8') or - encoding == 'utf-8' or encoding == 'utf8'): + encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'): return space.newbytes(space.utf8_w(w_object)) if w_encoder is None: from pypy.module._codecs.interp_codecs import lookup_codec From pypy.commits at gmail.com Mon Nov 20 05:44:35 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 20 Nov 2017 02:44:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Rename this directory to avoid name conflict with "rutf8.py" Message-ID: <5a12b213.c2311c0a.b2cae.6fe9@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93088:0a3c81c3f67d Date: 2017-11-20 11:44 +0100 http://bitbucket.org/pypy/pypy/changeset/0a3c81c3f67d/ Log: Rename this directory to avoid name conflict with "rutf8.py" diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/fastutf8/capi.py rename from rpython/rlib/rutf8/capi.py rename to rpython/rlib/fastutf8/capi.py diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/fastutf8/src/utf8-avx.c rename from rpython/rlib/rutf8/src/utf8-avx.c rename to rpython/rlib/fastutf8/src/utf8-avx.c diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/fastutf8/src/utf8-scalar.c rename from rpython/rlib/rutf8/src/utf8-scalar.c rename to rpython/rlib/fastutf8/src/utf8-scalar.c diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/fastutf8/src/utf8-sse4.c rename from rpython/rlib/rutf8/src/utf8-sse4.c rename to rpython/rlib/fastutf8/src/utf8-sse4.c diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/fastutf8/src/utf8.c rename from rpython/rlib/rutf8/src/utf8.c rename to rpython/rlib/fastutf8/src/utf8.c diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/fastutf8/src/utf8.h rename from rpython/rlib/rutf8/src/utf8.h rename to rpython/rlib/fastutf8/src/utf8.h From pypy.commits at gmail.com Mon Nov 20 05:53:44 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 02:53:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: in progress Message-ID: <5a12b438.c23a1c0a.17fa4.ddaf@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93089:e4a80363506c Date: 2017-11-20 11:52 +0100 http://bitbucket.org/pypy/pypy/changeset/e4a80363506c/ Log: in progress diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -1,30 +1,35 @@ +from hypothesis import given, strategies + +from rpython.rlib import rutf8 + from pypy.interpreter.unicodehelper import str_decode_utf8 from pypy.interpreter.unicodehelper import utf8_encode_ascii, str_decode_ascii +from pypy.interpreter import unicodehelper as uh def decode_utf8(u): return str_decode_utf8(u, True, "strict", None) def test_decode_utf8(): - assert decode_utf8("abc") == ("abc", 3) - assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 1) - assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 1) - assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 1) + assert decode_utf8("abc") == ("abc", 3, 3, rutf8.FLAG_ASCII) + assert decode_utf8("\xe1\x88\xb4") == ("\xe1\x88\xb4", 3, 1, rutf8.FLAG_REGULAR) + assert decode_utf8("\xed\xa0\x80") == ("\xed\xa0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) + assert decode_utf8("\xed\xb0\x80") == ("\xed\xb0\x80", 3, 1, rutf8.FLAG_HAS_SURROGATES) assert decode_utf8("\xed\xa0\x80\xed\xb0\x80") == ( - "\xed\xa0\x80\xed\xb0\x80", 2) - assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 1) + "\xed\xa0\x80\xed\xb0\x80", 6, 2, rutf8.FLAG_HAS_SURROGATES) + assert decode_utf8("\xf0\x90\x80\x80") == ("\xf0\x90\x80\x80", 4, 1, rutf8.FLAG_REGULAR) def test_utf8_encode_ascii(): - assert utf8_encode_ascii("abc", 3, "??", "??") == "abc" + assert utf8_encode_ascii("abc", "??", "??") == "abc" def eh(errors, encoding, reason, p, start, end): lst.append((errors, encoding, p, start, end)) return "", end lst = [] input = u"\u1234".encode("utf8") - assert utf8_encode_ascii(input, 1, "??", eh) == "" + assert utf8_encode_ascii(input, "??", eh) == "" assert lst == [("??", "ascii", input, 0, 1)] lst = [] input = u"\u1234\u5678abc\u8765\u4321".encode("utf8") - assert utf8_encode_ascii(input, 7, "??", eh) == "abc" + assert utf8_encode_ascii(input, "??", eh) == "abc" assert lst == [("??", "ascii", input, 0, 2), ("??", "ascii", input, 5, 7)] @@ -46,3 +51,7 @@ ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] + + at given(strategies.binary()) +def test_unicode_raw_escape(s): + uh.utf8_encode_raw_unicode_escape(s, 'strict') diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -158,7 +158,7 @@ res.append(chr(oc)) i += 1 else: - r, pos = errorhandler(errors, 'latin1', + r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) res.append(r) @@ -189,7 +189,7 @@ res.append(r) else: res.append(chr(ch)) - i = rutf8.next_codepoint_pos(utf8, i) + i = rutf8.next_codepoint_pos(utf8, i) pos += 1 s = res.build() @@ -318,7 +318,7 @@ assert pos - continuation_bytes >= 0 r = res.build() lgt, flag = rutf8.check_utf8(r, True) - return r, pos - continuation_bytes, lgt, flag + return r, pos, lgt, flag hexdigits = "0123456789ABCDEFabcdef" @@ -362,7 +362,7 @@ flag = rutf8.FLAG_REGULAR pos += digits size = 1 - + return pos, size, flag def str_decode_unicode_escape(s, errors, final, errorhandler, ud_handler): From pypy.commits at gmail.com Mon Nov 20 05:53:46 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 02:53:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge Message-ID: <5a12b43a.c23a1c0a.17fa4.ddb5@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93090:78c8a9571b3d Date: 2017-11-20 11:53 +0100 http://bitbucket.org/pypy/pypy/changeset/78c8a9571b3d/ Log: merge diff --git a/rpython/rlib/rutf8/capi.py b/rpython/rlib/fastutf8/capi.py rename from rpython/rlib/rutf8/capi.py rename to rpython/rlib/fastutf8/capi.py diff --git a/rpython/rlib/rutf8/src/utf8-avx.c b/rpython/rlib/fastutf8/src/utf8-avx.c rename from rpython/rlib/rutf8/src/utf8-avx.c rename to rpython/rlib/fastutf8/src/utf8-avx.c diff --git a/rpython/rlib/rutf8/src/utf8-scalar.c b/rpython/rlib/fastutf8/src/utf8-scalar.c rename from rpython/rlib/rutf8/src/utf8-scalar.c rename to rpython/rlib/fastutf8/src/utf8-scalar.c diff --git a/rpython/rlib/rutf8/src/utf8-sse4.c b/rpython/rlib/fastutf8/src/utf8-sse4.c rename from rpython/rlib/rutf8/src/utf8-sse4.c rename to rpython/rlib/fastutf8/src/utf8-sse4.c diff --git a/rpython/rlib/rutf8/src/utf8.c b/rpython/rlib/fastutf8/src/utf8.c rename from rpython/rlib/rutf8/src/utf8.c rename to rpython/rlib/fastutf8/src/utf8.c diff --git a/rpython/rlib/rutf8/src/utf8.h b/rpython/rlib/fastutf8/src/utf8.h rename from rpython/rlib/rutf8/src/utf8.h rename to rpython/rlib/fastutf8/src/utf8.h From pypy.commits at gmail.com Mon Nov 20 07:57:36 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 04:57:36 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: * Improve ascii/utf8 codecs and unicode escape Message-ID: <5a12d140.08a5df0a.f1c99.7558@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93091:4668380f4c79 Date: 2017-11-20 13:56 +0100 http://bitbucket.org/pypy/pypy/changeset/4668380f4c79/ Log: * Improve ascii/utf8 codecs and unicode escape * Raise instead of looping infinitely when errorhandler returns nonsense diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -33,25 +33,33 @@ assert lst == [("??", "ascii", input, 0, 2), ("??", "ascii", input, 5, 7)] + at given(strategies.text()) +def test_utf8_encode_ascii_2(u): + def eh(errors, encoding, reason, p, start, end): + return "?" * (end - start), end + + assert utf8_encode_ascii(u.encode("utf8"), "replace", eh) == u.encode("ascii", "replace") + def test_str_decode_ascii(): - assert str_decode_ascii("abc", 3, "??", True, "??") == ("abc", 3, 3) + assert str_decode_ascii("abc", "??", True, "??") == ("abc", 3, 3, rutf8.FLAG_ASCII) def eh(errors, encoding, reason, p, start, end): lst.append((errors, encoding, p, start, end)) - return u"\u1234\u5678", end + return u"\u1234\u5678".encode("utf8"), end lst = [] input = "\xe8" exp = u"\u1234\u5678".encode("utf8") - assert str_decode_ascii(input, 1, "??", True, eh) == (exp, 1, 2) + assert str_decode_ascii(input, "??", True, eh) == (exp, 1, 2, rutf8.FLAG_REGULAR) assert lst == [("??", "ascii", input, 0, 1)] lst = [] input = "\xe8\xe9abc\xea\xeb" - assert str_decode_ascii(input, 7, "??", True, eh) == ( - exp + exp + "abc" + exp + exp, 7, 11) + assert str_decode_ascii(input, "??", True, eh) == ( + exp + exp + "abc" + exp + exp, 7, 11, rutf8.FLAG_REGULAR) assert lst == [("??", "ascii", input, 0, 1), ("??", "ascii", input, 1, 2), ("??", "ascii", input, 5, 6), ("??", "ascii", input, 6, 7)] - at given(strategies.binary()) -def test_unicode_raw_escape(s): - uh.utf8_encode_raw_unicode_escape(s, 'strict') + at given(strategies.text()) +def test_unicode_raw_escape(u): + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict') + assert r == u.encode("raw-unicode-escape") diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -158,6 +158,7 @@ res.append(chr(oc)) i += 1 else: + XXX r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) @@ -179,10 +180,15 @@ pos = 0 while i < len(utf8): ch = rutf8.codepoint_at_pos(utf8, i) - if ch >= 0x7F: + if ch > 0x7F: + endpos = pos + 1 + end_i = rutf8.next_codepoint_pos(utf8, i) + while end_i < len(utf8) and rutf8.codepoint_at_pos(utf8, end_i) > 0x7F: + endpos += 1 + end_i = rutf8.next_codepoint_pos(utf8, end_i) msg = "ordinal not in range(128)" r, newpos = errorhandler(errors, 'ascii', msg, utf8, - pos, pos + 1) + pos, endpos) for _ in range(newpos - pos): i = rutf8.next_codepoint_pos(utf8, i) pos = newpos @@ -603,13 +609,13 @@ result = StringBuilder(size) pos = 0 while pos < size: - oc = ord(s[pos]) + oc = rutf8.codepoint_at_pos(s, pos) if oc < 0x100: result.append(chr(oc)) else: raw_unicode_escape_helper(result, oc) - pos += 1 + pos = rutf8.next_codepoint_pos(s, pos) return result.build() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -71,6 +71,9 @@ raise oefmt(space.w_IndexError, "position %d from error handler out of bounds", newpos) + if newpos < startpos: + raise oefmt(space.w_IndexError, + "position %d from error handler did not progress", newpos) w_replace = space.convert_to_w_unicode(w_replace) return w_replace._utf8, newpos return call_errorhandler From pypy.commits at gmail.com Mon Nov 20 08:20:19 2017 From: pypy.commits at gmail.com (stian) Date: Mon, 20 Nov 2017 05:20:19 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Test and fix for int rbinop overflow to long, also add a deeper test for int_floordiv Message-ID: <5a12d693.4a371c0a.3387f.acb3@mx.google.com> Author: stian Branch: math-improvements Changeset: r93092:e6c9af023bc5 Date: 2017-11-20 14:19 +0100 http://bitbucket.org/pypy/pypy/changeset/e6c9af023bc5/ Log: Test and fix for int rbinop overflow to long, also add a deeper test for int_floordiv diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py --- a/pypy/objspace/std/intobject.py +++ b/pypy/objspace/std/intobject.py @@ -589,7 +589,7 @@ try: return func(space, y, x) except OverflowError: - return ovf2long(space, y, x, w_other) + return ovf2long(space, y, x, self) else: return func(space, y, x) diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py --- a/pypy/objspace/std/test/test_intobject.py +++ b/pypy/objspace/std/test/test_intobject.py @@ -613,6 +613,9 @@ assert type(x) is int assert str(x) == "0" + def test_rbinop_overflow(self): + x = int(321) + assert x.__rlshift__(333) == 1422567365923326114875084456308921708325401211889530744784729710809598337369906606315292749899759616L class AppTestIntShortcut(AppTestInt): spaceconfig = {"objspace.std.intshortcut": True} diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -70,6 +70,15 @@ r2 = r.int_floordiv(10) assert r2.tolong() == 100L + for op1 in gen_signs(long_vals): + for op2 in gen_signs(long_vals): + if not op2 or op2 >= (1 << SHIFT) or op2 <= -(1 << SHIFT): + continue + rl_op1 = rbigint.fromlong(op1) + r1 = rl_op1.int_floordiv(op2) + r2 = op1 // op2 + assert r1.tolong() == r2 + assert py.test.raises(ZeroDivisionError, r.int_floordiv, 0) # Error pointed out by Armin Rigo From pypy.commits at gmail.com Mon Nov 20 08:33:28 2017 From: pypy.commits at gmail.com (stian) Date: Mon, 20 Nov 2017 05:33:28 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Add test for overflow with regular binops too, now there should be test for all changes to intobject Message-ID: <5a12d9a8.54d91c0a.cb2a3.8fd0@mx.google.com> Author: stian Branch: math-improvements Changeset: r93093:89a762f37f25 Date: 2017-11-20 14:32 +0100 http://bitbucket.org/pypy/pypy/changeset/89a762f37f25/ Log: Add test for overflow with regular binops too, now there should be test for all changes to intobject diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py --- a/pypy/objspace/std/test/test_intobject.py +++ b/pypy/objspace/std/test/test_intobject.py @@ -613,6 +613,10 @@ assert type(x) is int assert str(x) == "0" + def test_binop_overflow(self): + x = int(2) + assert x.__lshift__(128) == 680564733841876926926749214863536422912L + def test_rbinop_overflow(self): x = int(321) assert x.__rlshift__(333) == 1422567365923326114875084456308921708325401211889530744784729710809598337369906606315292749899759616L From pypy.commits at gmail.com Mon Nov 20 09:02:15 2017 From: pypy.commits at gmail.com (stian) Date: Mon, 20 Nov 2017 06:02:15 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Test for int_pow, test+fix for pow ValueError with third argument as 0 Message-ID: <5a12e067.c23a1c0a.17fa4.1a69@mx.google.com> Author: stian Branch: math-improvements Changeset: r93094:9291ee92df89 Date: 2017-11-20 15:01 +0100 http://bitbucket.org/pypy/pypy/changeset/9291ee92df89/ Log: Test for int_pow, test+fix for pow ValueError with third argument as 0 diff --git a/pypy/objspace/std/test/test_longobject.py b/pypy/objspace/std/test/test_longobject.py --- a/pypy/objspace/std/test/test_longobject.py +++ b/pypy/objspace/std/test/test_longobject.py @@ -192,6 +192,12 @@ assert pow(x, 0L, 1L) == 0L assert pow(-1L, -1L) == -1.0 + def test_int_pow(self): + x = 2L + assert pow(x, 2) == 4L + assert pow(x, 2, 2) == 0L + assert pow(x, 2, 3L) == 1L + def test_getnewargs(self): assert 0L .__getnewargs__() == (0L,) assert (-1L) .__getnewargs__() == (-1L,) diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -987,9 +987,7 @@ size_b = UDIGIT_TYPE(b.numdigits()) - if b.sign == 0: - return ONERBIGINT - elif c is not None: + if c is not None: if c.sign == 0: raise ValueError("pow() 3rd argument cannot be 0") @@ -1016,6 +1014,8 @@ # so we only do it when it buys something. if a.sign < 0 or a.numdigits() > c.numdigits(): a = a.mod(c) + elif b.sign == 0: + return ONERBIGINT elif a.sign == 0: return NULLRBIGINT elif size_b == 1: @@ -1124,9 +1124,7 @@ raise ValueError("bigint pow() too negative") assert b >= 0 - if b == 0: - return ONERBIGINT - elif c is not None: + if c is not None: if c.sign == 0: raise ValueError("pow() 3rd argument cannot be 0") @@ -1153,6 +1151,8 @@ # so we only do it when it buys something. if a.sign < 0 or a.numdigits() > c.numdigits(): a = a.mod(c) + elif b == 0: + return ONERBIGINT elif a.sign == 0: return NULLRBIGINT elif b == 1: diff --git a/rpython/rlib/test/test_rbigint.py b/rpython/rlib/test/test_rbigint.py --- a/rpython/rlib/test/test_rbigint.py +++ b/rpython/rlib/test/test_rbigint.py @@ -190,7 +190,12 @@ r4 = pow(op1, op2, 1000) print op1, op2 assert r3.tolong() == r4 - + + def test_pow_raises(self): + r1 = rbigint.fromint(2) + r0 = rbigint.fromint(0) + py.test.raises(ValueError, r1.int_pow, 2, r0) + py.test.raises(ValueError, r1.pow, r1, r0) def test_touint(self): result = r_uint(sys.maxint + 42) rl = rbigint.fromint(sys.maxint).add(rbigint.fromint(42)) From pypy.commits at gmail.com Mon Nov 20 09:08:56 2017 From: pypy.commits at gmail.com (stian) Date: Mon, 20 Nov 2017 06:08:56 -0800 (PST) Subject: [pypy-commit] pypy math-improvements: Typo in comment Message-ID: <5a12e1f8.08a5df0a.f1c99.8cdd@mx.google.com> Author: stian Branch: math-improvements Changeset: r93095:6ba5b9334842 Date: 2017-11-20 15:08 +0100 http://bitbucket.org/pypy/pypy/changeset/6ba5b9334842/ Log: Typo in comment diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py --- a/rpython/rlib/rbigint.py +++ b/rpython/rlib/rbigint.py @@ -2120,7 +2120,7 @@ assert vv >= 0 assert wm1 >= 1 q = vv / wm1 - r = vv % wm1 # This seems to be slightly faster than on widen digits than vv - wm1 * q. + r = vv % wm1 # This seems to be slightly faster on widen digits than vv - wm1 * q. vj2 = v.digit(abs(j-2)) while wm2 * q > ((r << SHIFT) | vj2): q -= 1 From pypy.commits at gmail.com Mon Nov 20 09:15:52 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 06:15:52 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fixes Message-ID: <5a12e398.7a86df0a.46775.bd55@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93096:d17afc06eedf Date: 2017-11-20 15:15 +0100 http://bitbucket.org/pypy/pypy/changeset/d17afc06eedf/ Log: fixes diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -63,3 +63,8 @@ def test_unicode_raw_escape(u): r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict') assert r == u.encode("raw-unicode-escape") + + at given(strategies.text()) +def test_unicode_escape(u): + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict") + assert r == u.encode("unicode-escape") diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -23,13 +23,12 @@ @specialize.memo() def encode_error_handler(space): # Fast version of the "strict" errors handler. - def raise_unicode_exception_encode(errors, encoding, msg, u, u_len, + def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - # XXX fix once we stop using runicode.py - flag = _get_flag(u.decode('utf8')) + u_len, flag = rutf8.check_utf8(utf8) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), - space.newutf8(u, u_len, flag), + space.newutf8(utf8, u_len, flag), space.newint(startingpos), space.newint(endingpos), space.newtext(msg)])) @@ -578,13 +577,15 @@ digits = 4 if s[pos] == 'u' else 8 message = "truncated \\uXXXX" pos += 1 - pos = hexescape(result, s, pos, digits, + pos, _, _ = hexescape(result, s, pos, digits, "rawunicodeescape", errorhandler, message, errors) r = result.build() lgt, flag = rutf8.check_utf8(r, True) return r, pos, lgt, flag +_utf8_encode_unicode_escape = rutf8.make_utf8_escape_function() + TABLE = '0123456789abcdef' @@ -620,6 +621,9 @@ return result.build() +def utf8_encode_unicode_escape(s, errors): + return _utf8_encode_unicode_escape(s) + # ____________________________________________________________ # utf-7 From pypy.commits at gmail.com Mon Nov 20 10:11:17 2017 From: pypy.commits at gmail.com (mattip) Date: Mon, 20 Nov 2017 07:11:17 -0800 (PST) Subject: [pypy-commit] pypy default: call register_code to profile functions, cleanup Message-ID: <5a12f095.038b1c0a.348ad.47a8@mx.google.com> Author: Matti Picus Branch: Changeset: r93097:d632e9ca79ae Date: 2017-11-20 17:09 +0200 http://bitbucket.org/pypy/pypy/changeset/d632e9ca79ae/ Log: call register_code to profile functions, cleanup diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -164,23 +164,25 @@ @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) def main(self, code, count): + code = self.MyCode('py:main:3:main') + rvmprof.register_code(code, self.MyCode.get_name) + code = self.MyCode('py:code:7:native_func') + rvmprof.register_code(code, self.MyCode.get_name) if count > 0: return self.main(code, count-1) else: return self.native_func(100) def test(self): - # XXX: this test is known to fail since rev a4f077ba651c, but buildbot - # never ran it. FIXME. from vmprof import read_profile - from vmprof.show import PrettyPrinter + # from vmprof.show import PrettyPrinter assert self.rpy_entry_point(3, 0.5) == 42000 assert self.tmpfile.check() - # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() - p = PrettyPrinter() - p._print_tree(tree) + # p = PrettyPrinter() + # p._print_tree(tree) def walk(tree, symbols): symbols.append(tree.name) if len(tree.children) == 0: @@ -189,7 +191,7 @@ walk(child, symbols) symbols = [] walk(tree, symbols) - not_found = ['n:native_func'] + not_found = ['py:code:7:native_func'] for sym in symbols: for i,name in enumerate(not_found): if sym.startswith(name): From pypy.commits at gmail.com Mon Nov 20 10:32:29 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 07:32:29 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: silence a warning on OS X Message-ID: <5a12f58d.5d87df0a.a0b86.4f16@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93098:d18dd16d58c7 Date: 2017-11-20 16:31 +0100 http://bitbucket.org/pypy/pypy/changeset/d18dd16d58c7/ Log: silence a warning on OS X diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -1867,7 +1867,8 @@ c_chroot = external('chroot', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO, - macro=_MACRO_ON_POSIX) + macro=_MACRO_ON_POSIX, + compilation_info=ExternalCompilationInfo(includes=['unistd.h'])) @replace_os_function('chroot') def chroot(path): From pypy.commits at gmail.com Mon Nov 20 10:43:40 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 07:43:40 -0800 (PST) Subject: [pypy-commit] pypy default: silence a warning on OS X Message-ID: <5a12f82c.8b951c0a.cd426.8d23@mx.google.com> Author: fijal Branch: Changeset: r93099:6c9c3791d06a Date: 2017-11-20 16:31 +0100 http://bitbucket.org/pypy/pypy/changeset/6c9c3791d06a/ Log: silence a warning on OS X diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -1881,7 +1881,8 @@ c_chroot = external('chroot', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO, - macro=_MACRO_ON_POSIX) + macro=_MACRO_ON_POSIX, + compilation_info=ExternalCompilationInfo(includes=['unistd.h'])) @replace_os_function('chroot') def chroot(path): From pypy.commits at gmail.com Mon Nov 20 10:43:42 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 07:43:42 -0800 (PST) Subject: [pypy-commit] pypy default: merge Message-ID: <5a12f82e.cf2f1c0a.40dce.6652@mx.google.com> Author: fijal Branch: Changeset: r93100:21fd35c44d66 Date: 2017-11-20 16:42 +0100 http://bitbucket.org/pypy/pypy/changeset/21fd35c44d66/ Log: merge diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -15,34 +15,10 @@ typeof, s_ImpossibleValue, SomeInstance, intersection, difference) from rpython.annotator.bookkeeper import Bookkeeper from rpython.rtyper.normalizecalls import perform_normalizations -from collections import deque log = AnsiLogger("annrpython") -class ShuffleDict(object): - def __init__(self): - self._d = {} - self.keys = deque() - - def __setitem__(self, k, v): - if k in self._d: - self._d[k] = v - else: - self._d[k] = v - self.keys.append(k) - - def __getitem__(self, k): - return self._d[k] - - def popitem(self): - key = self.keys.popleft() - item = self._d.pop(key) - return (key, item) - - def __nonzero__(self): - return bool(self._d) - class RPythonAnnotator(object): """Block annotator for RPython. See description in doc/translation.txt.""" @@ -57,7 +33,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = ShuffleDict() # map {block: graph-containing-it} + self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed @@ -81,7 +57,7 @@ self.errors = [] def __getstate__(self): - attrs = """translator pendingblocks annotated links_followed + attrs = """translator genpendingblocks annotated links_followed notify bookkeeper frozen policy added_blocks""".split() ret = self.__dict__.copy() for key, value in ret.items(): @@ -212,19 +188,47 @@ else: self.mergeinputargs(graph, block, cells) if not self.annotated[block]: - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) + + def schedulependingblock(self, graph, block): + # 'self.genpendingblocks' is a list of dictionaries which is + # logically equivalent to just one dictionary. But we keep a + # 'generation' number on each block (=key), and whenever we + # process a block, we increase its generation number. The + # block is added to the 'genpendingblocks' indexed by its + # generation number. See complete_pending_blocks() below. + generation = getattr(block, 'generation', 0) + self.genpendingblocks[generation][block] = graph def complete_pending_blocks(self): - while self.pendingblocks: - block, graph = self.pendingblocks.popitem() - self.processblock(graph, block) + while True: + # Find the first of the dictionaries in 'self.genpendingblocks' + # which is not empty + gen = 0 + for pendingblocks in self.genpendingblocks: + if pendingblocks: + break + gen += 1 + else: + return # all empty => done + + gen += 1 # next generation number + if len(self.genpendingblocks) == gen: + self.genpendingblocks.append({}) + + # Process all blocks at this level + # (if any gets re-inserted, it will be into the next level) + while pendingblocks: + block, graph = pendingblocks.popitem() + block.generation = gen + self.processblock(graph, block) def complete(self): """Process pending blocks until none is left.""" while True: self.complete_pending_blocks() self.policy.no_more_blocks_to_annotate(self) - if not self.pendingblocks: + if not any(self.genpendingblocks): break # finished # make sure that the return variables of all graphs is annotated if self.added_blocks is not None: @@ -410,7 +414,7 @@ def reflowpendingblock(self, graph, block): assert not self.frozen assert graph not in self.fixed_graphs - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) assert block in self.annotated self.annotated[block] = False # must re-flow self.blocked_blocks[block] = (graph, None) diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py --- a/rpython/flowspace/model.py +++ b/rpython/flowspace/model.py @@ -170,7 +170,7 @@ class Block(object): __slots__ = """inputargs operations exitswitch - exits blockcolor""".split() + exits blockcolor generation""".split() def __init__(self, inputargs): self.inputargs = list(inputargs) # mixed list of variable/const XXX diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -164,23 +164,25 @@ @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) def main(self, code, count): + code = self.MyCode('py:main:3:main') + rvmprof.register_code(code, self.MyCode.get_name) + code = self.MyCode('py:code:7:native_func') + rvmprof.register_code(code, self.MyCode.get_name) if count > 0: return self.main(code, count-1) else: return self.native_func(100) def test(self): - # XXX: this test is known to fail since rev a4f077ba651c, but buildbot - # never ran it. FIXME. from vmprof import read_profile - from vmprof.show import PrettyPrinter + # from vmprof.show import PrettyPrinter assert self.rpy_entry_point(3, 0.5) == 42000 assert self.tmpfile.check() - # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() - p = PrettyPrinter() - p._print_tree(tree) + # p = PrettyPrinter() + # p._print_tree(tree) def walk(tree, symbols): symbols.append(tree.name) if len(tree.children) == 0: @@ -189,7 +191,7 @@ walk(child, symbols) symbols = [] walk(tree, symbols) - not_found = ['n:native_func'] + not_found = ['py:code:7:native_func'] for sym in symbols: for i,name in enumerate(not_found): if sym.startswith(name): From pypy.commits at gmail.com Mon Nov 20 10:55:02 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 07:55:02 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fixes until we get to formatting problems Message-ID: <5a12fad6.08e31c0a.60024.9008@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93101:f074b4987d57 Date: 2017-11-20 16:54 +0100 http://bitbucket.org/pypy/pypy/changeset/f074b4987d57/ Log: fixes until we get to formatting problems diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1759,20 +1759,6 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) - - @specialize.argtype(1) - def unicode_w(self, w_obj): - return self.utf8_w(w_obj).decode('utf8') - - def realunicode_w(self, w_obj): - return self.realutf8_w(w_obj).decode('utf8') - - def newunicode(self, u): - from pypy.interpreter import unicodehelper - assert isinstance(u, unicode) - # XXX let's disallow that - return self.newutf8(u.encode("utf8"), len(u), unicodehelper._get_flag(u)) - def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/interpreter/test/test_unicodehelper.py b/pypy/interpreter/test/test_unicodehelper.py --- a/pypy/interpreter/test/test_unicodehelper.py +++ b/pypy/interpreter/test/test_unicodehelper.py @@ -61,10 +61,10 @@ @given(strategies.text()) def test_unicode_raw_escape(u): - r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict') + r = uh.utf8_encode_raw_unicode_escape(u.encode("utf8"), 'strict', None) assert r == u.encode("raw-unicode-escape") @given(strategies.text()) def test_unicode_escape(u): - r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict") + r = uh.utf8_encode_unicode_escape(u.encode("utf8"), "strict", None) assert r == u.encode("unicode-escape") diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -60,14 +60,12 @@ return True return False -def _get_flag(u): - flag = rutf8.FLAG_ASCII - for c in u: - if 0xD800 <= ord(c) <= 0xDFFF: - return rutf8.FLAG_HAS_SURROGATES - if ord(c) >= 0x80: - flag = rutf8.FLAG_REGULAR - return flag +def get_flag_from_code(oc): + if oc <= 0x7F: + return rutf8.FLAG_ASCII + if 0xD800 <= oc <= 0xDFFF: + return rutf8.FLAG_HAS_SURROGATES + return rutf8.FLAG_REGULAR # These functions take and return unwrapped rpython strings def decode_unicode_escape(space, string): @@ -134,7 +132,11 @@ return ress, len(s), lgt, flag def str_decode_latin_1(s, errors, final, errorhandler): - xxx + try: + rutf8.check_ascii(s) + return s, len(s), len(s), rutf8.FLAG_ASCII + except rutf8.CheckError: + return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) def utf8_encode_latin_1(s, errors, errorhandler): try: @@ -208,7 +210,6 @@ slen = len(s) res = StringBuilder(slen) pos = 0 - continuation_bytes = 0 end = len(s) while pos < end: ordch1 = ord(s[pos]) @@ -229,6 +230,7 @@ if ordch1 <= 0xDF: if pos >= end: if not final: + pos -= 1 break r, pos = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos) @@ -243,7 +245,6 @@ continue # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz pos += 1 - continuation_bytes += 1 res.append(chr(ordch1)) res.append(chr(ordch2)) continue @@ -251,6 +252,7 @@ if ordch1 <= 0xEF: if (pos + 2) > end: if not final: + pos -= 1 break r, pos = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos + 1) @@ -272,7 +274,6 @@ pos += 2 # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz - continuation_bytes += 2 res.append(chr(ordch1)) res.append(chr(ordch2)) res.append(chr(ordch3)) @@ -281,6 +282,7 @@ if ordch1 <= 0xF4: if (pos + 3) > end: if not final: + pos -= 1 break r, pos = errorhandler(errors, "utf8", "unexpected end of data", s, pos - 1, pos) @@ -312,15 +314,12 @@ res.append(chr(ordch2)) res.append(chr(ordch3)) res.append(chr(ordch4)) - continuation_bytes += 3 continue r, pos = errorhandler(errors, "utf8", "invalid start byte", s, pos - 1, pos) res.append(r) - assert pos == end - assert pos - continuation_bytes >= 0 r = res.build() lgt, flag = rutf8.check_utf8(r, True) return r, pos, lgt, flag @@ -352,19 +351,14 @@ else: # when we get here, chr is a 32-bit unicode character if chr > 0x10ffff: - UUU message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) + size, flag = rutf8.check_utf8(res) builder.append(res) else: rutf8.unichr_as_utf8_append(builder, chr, True) - if chr <= 0x7f: - flag = rutf8.FLAG_ASCII - elif 0xd800 <= chr <= 0xdfff: - flag = rutf8.FLAG_HAS_SURROGATES - else: - flag = rutf8.FLAG_REGULAR + flag = get_flag_from_code(chr) pos += digits size = 1 @@ -508,22 +502,22 @@ builder.append(res) continue pos = look + 1 - XXX - if code <= MAXUNICODE: - builder.append(UNICHR(code)) - else: - code -= 0x10000L - builder.append(unichr(0xD800 + (code >> 10))) - builder.append(unichr(0xDC00 + (code & 0x03FF))) + outsize += 1 + flag = combine_flags(flag, get_flag_from_code(code)) + rutf8.unichr_as_utf8_append(builder, code) else: - YYY res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) + newsize, newflag = rutf8.check_utf8(res, True) + flag = combine_flags(flag, newflag) + outsize += newsize builder.append(res) else: - AAA res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) + newsize, newflag = rutf8.check_utf8(res, True) + flag = combine_flags(flag, newflag) + outsize += newsize builder.append(res) else: builder.append('\\') @@ -602,7 +596,7 @@ for i in range(zeros-1, -1, -1): result.append(TABLE[(char >> (4 * i)) & 0x0f]) -def utf8_encode_raw_unicode_escape(s, errors, errorhandler=None): +def utf8_encode_raw_unicode_escape(s, errors, errorhandler): # errorhandler is not used: this function cannot cause Unicode errors size = len(s) if size == 0: @@ -621,7 +615,7 @@ return result.build() -def utf8_encode_unicode_escape(s, errors): +def utf8_encode_unicode_escape(s, errors, errorhandler): return _utf8_encode_unicode_escape(s) # ____________________________________________________________ @@ -851,7 +845,7 @@ assert final_length >= 0 return result.build()[:final_length], pos, outsize, flag -def utf8_encode_utf_7(s, errors, errorhandler=None): +def utf8_encode_utf_7(s, errors, errorhandler): size = len(s) if size == 0: return '' @@ -1294,3 +1288,153 @@ errorhandler=None, allow_surrogates=True): return unicode_encode_utf_32_helper(s, errors, errorhandler, allow_surrogates, "little") + +# ____________________________________________________________ +# unicode-internal + +def str_decode_unicode_internal(s, errors, final=False, + errorhandler=None): + size = len(s) + if size == 0: + return '', 0, 0, rutf8.FLAG_ASCII + + unicode_bytes = 4 + if BYTEORDER == "little": + start = 0 + stop = unicode_bytes + step = 1 + else: + start = unicode_bytes - 1 + stop = -1 + step = -1 + + result = StringBuilder(size) + pos = 0 + while pos < size: + if pos > size - unicode_bytes: + res, pos = errorhandler(errors, "unicode_internal", + "truncated input", + s, pos, size) + result.append(res) + if pos > size - unicode_bytes: + break + continue + t = r_uint(0) + h = 0 + for j in range(start, stop, step): + t += r_uint(ord(s[pos + j])) << (h*8) + h += 1 + if t > 0x10ffff: + res, pos = errorhandler(errors, "unicode_internal", + "unichr(%d) not in range" % (t,), + s, pos, pos + unicode_bytes) + result.append(res) + continue + rutf8.unichr_as_utf8_append(result, intmask(t)) + pos += unicode_bytes + r = result.build() + lgt, flag = rutf8.check_utf8(r, True) + return r, pos, lgt, flag + +def utf8_encode_unicode_internal(s, errors, errorhandler): + size = len(s) + if size == 0: + return '' + + result = StringBuilder(size * 4) + pos = 0 + while pos < size: + oc = rutf8.codepoint_at_pos(s, pos) + if BYTEORDER == "little": + result.append(chr(oc & 0xFF)) + result.append(chr(oc >> 8 & 0xFF)) + result.append(chr(oc >> 16 & 0xFF)) + result.append(chr(oc >> 24 & 0xFF)) + else: + result.append(chr(oc >> 24 & 0xFF)) + result.append(chr(oc >> 16 & 0xFF)) + result.append(chr(oc >> 8 & 0xFF)) + result.append(chr(oc & 0xFF)) + pos = rutf8.next_codepoint_pos(s, pos) + + return result.build() + +# ____________________________________________________________ +# Charmap + +ERROR_CHAR = u'\ufffe'.encode('utf8') + + at specialize.argtype(4) +def str_decode_charmap(s, errors, final=False, + errorhandler=None, mapping=None): + "mapping can be a rpython dictionary, or a dict-like object." + + # Default to Latin-1 + if mapping is None: + return str_decode_latin_1(s, errors, final=final, + errorhandler=errorhandler) + size = len(s) + if size == 0: + return '', 0, 0, rutf8.FLAG_ASCII + + pos = 0 + result = StringBuilder(size) + while pos < size: + ch = s[pos] + + c = mapping.get(ch, ERROR_CHAR) + if c == ERROR_CHAR: + r, pos = errorhandler(errors, "charmap", + "character maps to ", + s, pos, pos + 1) + result.append(r) + continue + result.append(c) + pos += 1 + r = result.build() + lgt, flag = rutf8.check_utf8(r, True) + return r, pos, lgt, flag + +def utf8_encode_charmap(s, errors, errorhandler=None, + mapping=None): + YYY + if mapping is None: + return unicode_encode_latin_1(s, size, errors, + errorhandler=errorhandler) + + if errorhandler is None: + errorhandler = default_unicode_error_encode + + if size == 0: + return '' + result = StringBuilder(size) + pos = 0 + while pos < size: + ch = s[pos] + + c = mapping.get(ch, '') + if len(c) == 0: + # collect all unencodable chars. Important for narrow builds. + collend = pos + 1 + while collend < size and mapping.get(s[collend], '') == '': + collend += 1 + ru, rs, pos = errorhandler(errors, "charmap", + "character maps to ", + s, pos, collend) + if rs is not None: + # py3k only + result.append(rs) + continue + for ch2 in ru: + c2 = mapping.get(ch2, '') + if len(c2) == 0: + errorhandler( + "strict", "charmap", + "character maps to ", + s, pos, pos + 1) + result.append(c2) + continue + result.append(c) + pos += 1 + return result.build() + diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,7 +1,6 @@ from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import we_are_translated, not_rpython from rpython.rlib.rstring import UnicodeBuilder -from rpython.rlib.runicode import code_to_unichr, MAXUNICODE from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -563,14 +562,14 @@ if space.isinstance_w(w_ch, space.w_unicode): # Charmap may return a unicode string - return space.unicode_w(w_ch) + return space.utf8_w(w_ch) elif space.isinstance_w(w_ch, space.w_int): # Charmap may return a number x = space.int_w(w_ch) if not 0 <= x <= 0x10FFFF: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") - return code_to_unichr(x) + return rutf8.unichr_as_utf8(x) elif space.is_w(w_ch, space.w_None): # Charmap may return None return errorchar @@ -614,12 +613,13 @@ @unwrap_spec(string='bufferstr', errors='text_or_none') def charmap_decode(space, string, errors="strict", w_mapping=None): - from pypy.interpreter.unicodehelper import DecodeWrapper + from pypy.interpreter import unicodehelper if errors is None: errors = 'strict' if len(string) == 0: - return space.newtuple([space.newunicode(u''), space.newint(0)]) + return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + space.newint(0)]) if space.is_none(w_mapping): mapping = None @@ -628,14 +628,14 @@ final = True state = space.fromcache(CodecState) - result, consumed = runicode.str_decode_charmap( - string, len(string), errors, - final, DecodeWrapper(state.decode_error_handler).handle, mapping) - return space.newtuple([space.newunicode(result), space.newint(consumed)]) + result, consumed, lgt, flag = unicodehelper.str_decode_charmap( + string, errors, final, state.decode_error_handler, mapping) + return space.newtuple([space.newutf8(result, lgt, flag), + space.newint(consumed)]) @unwrap_spec(utf8='utf8', errors='text_or_none') def charmap_encode(space, utf8, errors="strict", w_mapping=None): - from pypy.interpreter.unicodehelper import EncodeWrapper + from pypy.interpreter import unicodehelper if errors is None: errors = 'strict' @@ -645,10 +645,8 @@ mapping = Charmap_Encode(space, w_mapping) state = space.fromcache(CodecState) - uni = utf8.decode('utf8') - result = runicode.unicode_encode_charmap( - uni, len(uni), errors, - EncodeWrapper(state.encode_error_handler).handle, mapping) + result = unicodehelper.unicode_encode_charmap( + utf8, errors, state.encode_error_handler, mapping) return space.newtuple([space.newbytes(result), space.newint(len(uni))]) @@ -707,7 +705,7 @@ @unwrap_spec(errors='text_or_none') def unicode_internal_decode(space, w_string, errors="strict"): - from pypy.interpreter.unicodehelper import DecodeWrapper + from pypy.interpreter import unicodehelper if errors is None: errors = 'strict' @@ -718,14 +716,16 @@ string = space.readbuf_w(w_string).as_str() if len(string) == 0: - return space.newtuple([space.newunicode(u''), space.newint(0)]) + return space.newtuple([space.newutf8('', 0, rutf8.FLAG_ASCII), + space.newint(0)]) final = True state = space.fromcache(CodecState) - result, consumed = runicode.str_decode_unicode_internal( - string, len(string), errors, - final, DecodeWrapper(state.decode_error_handler).handle) - return space.newtuple([space.newunicode(result), space.newint(consumed)]) + result, consumed, lgt, flag = unicodehelper.str_decode_unicode_internal( + string, errors, + final, state.decode_error_handler) + return space.newtuple([space.newutf8(result, lgt, flag), + space.newint(consumed)]) # ____________________________________________________________ # support for the "string escape" codec diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -15,7 +15,6 @@ 'utf-32', 'utf-32-le', 'utf-32-be', 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): - print encoding assert unicode(u.encode(encoding),encoding) == u def test_ucs4(self): diff --git a/pypy/module/exceptions/interp_exceptions.py b/pypy/module/exceptions/interp_exceptions.py --- a/pypy/module/exceptions/interp_exceptions.py +++ b/pypy/module/exceptions/interp_exceptions.py @@ -285,7 +285,7 @@ def descr_init(self, space, w_object, w_start, w_end, w_reason): # typechecking - space.realunicode_w(w_object) + space.utf8_w(w_object) space.int_w(w_start) space.int_w(w_end) space.realtext_w(w_reason) @@ -719,7 +719,7 @@ def descr_init(self, space, w_encoding, w_object, w_start, w_end, w_reason): # typechecking space.realtext_w(w_encoding) - space.realunicode_w(w_object) # XXX realutf8()? + space.utf8_w(w_object) space.int_w(w_start) space.int_w(w_end) space.realtext_w(w_reason) diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -432,8 +432,7 @@ def fmt_s(self, w_value): space = self.space - got_unicode = space.isinstance_w(w_value, - space.w_unicode) + got_unicode = space.isinstance_w(w_value, space.w_unicode) if not do_unicode: if got_unicode: raise NeedUnicodeFormattingError diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -164,9 +164,9 @@ if isinstance(x, str): return self.newtext(x) if isinstance(x, unicode): - from pypy.interpreter import unicodehelper - return self.newutf8(x.encode('utf8'), len(x), - unicodehelper._get_flag(x)) + x = x.encode('utf8') + lgt, flag = rutf8.check_utf8(x, True) + return self.newutf8(x, lgt, flag) if isinstance(x, float): return W_FloatObject(x) if isinstance(x, W_Root): From pypy.commits at gmail.com Mon Nov 20 13:35:20 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 10:35:20 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Bug-for-bug compatibility (and performance optimisation) in BufferedReader.readinto1() Message-ID: <5a132068.21b9df0a.dcef.7ba2@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93102:feaba8e9bb0a Date: 2017-11-20 18:34 +0000 http://bitbucket.org/pypy/pypy/changeset/feaba8e9bb0a/ Log: Bug-for-bug compatibility (and performance optimisation) in BufferedReader.readinto1() diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py --- a/pypy/module/_io/interp_bufferedio.py +++ b/pypy/module/_io/interp_bufferedio.py @@ -111,14 +111,15 @@ self._unsupportedoperation(space, "detach") def readinto_w(self, space, w_buffer): - return self._readinto(space, w_buffer, "read") + return self._readinto(space, w_buffer, read_once=False) def readinto1_w(self, space, w_buffer): - return self._readinto(space, w_buffer, "read1") + return self._readinto(space, w_buffer, read_once=True) - def _readinto(self, space, w_buffer, methodname): + def _readinto(self, space, w_buffer, read_once): rwbuffer = space.writebuf_w(w_buffer) length = rwbuffer.getlength() + methodname = "read1" if read_once else "read" w_data = space.call_method(self, methodname, space.newint(length)) if not space.isinstance_w(w_data, space.w_bytes): @@ -882,6 +883,52 @@ self._reader_reset_buf() self.state = STATE_OK + def _readinto(self, space, w_buffer, read_once): + rwbuffer = space.writebuf_w(w_buffer) + length = rwbuffer.getlength() + with self.lock: + have = self._readahead() + if have >= length: + rwbuffer.setslice(0, self.buffer[self.pos:self.pos + length]) + return space.newint(length) + written = 0 + if have > 0: + rwbuffer.setslice(0, self.buffer[self.pos:self.read_end]) + written = have + + while written < length: + if self.writable: + self._flush_and_rewind_unlocked(space) + self._reader_reset_buf() + self.pos = 0 + if written + len(self.buffer) < length: + try: + got = self._raw_read(space, rwbuffer, written, length - written) + written += got + except BlockingIOError: + got = 0 + if got == 0: + break + elif read_once and written: + break + else: + try: + have = self._fill_buffer(space) + except BlockingIOError: + have = 0 + if have == 0: + break + endpos = min(have, length - written) + assert endpos >= 0 + rwbuffer.setslice(written, self.buffer[0:endpos]) + written += endpos + self.pos = endpos + if read_once: + break + return space.newint(written) + + + W_BufferedReader.typedef = TypeDef( '_io.BufferedReader', W_BufferedIOBase.typedef, __new__ = generic_new_descr(W_BufferedReader), diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py --- a/pypy/module/_io/test/test_bufferedio.py +++ b/pypy/module/_io/test/test_bufferedio.py @@ -189,6 +189,31 @@ b = bytearray(2) raises(ValueError, bufio.readinto, b) + def test_readinto1(self): + import _io + + class MockIO(_io._IOBase): + def readable(self): + return True + + def readinto(self, buf): + buf[:3] = b"abc" + return 3 + bufio = _io.BufferedReader(MockIO(), buffer_size=5) + buf = bytearray(10) + bufio.read(2) + n = bufio.readinto1(buf) + assert n == 4 + assert buf[:n] == b'cabc' + + # Yes, CPython's observable behavior depends on buffer_size! + bufio = _io.BufferedReader(MockIO(), buffer_size=20) + buf = bytearray(10) + bufio.read(2) + n = bufio.readinto1(buf) + assert n == 1 + assert buf[:n] == b'c' + def test_seek(self): import _io raw = _io.FileIO(self.tmpfile) From pypy.commits at gmail.com Mon Nov 20 13:49:39 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 10:49:39 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Remove comment: this test is not supposed to fail any more Message-ID: <5a1323c3.8190df0a.bcb98.95d4@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93103:a5d1206f11e4 Date: 2017-11-20 18:49 +0000 http://bitbucket.org/pypy/pypy/changeset/a5d1206f11e4/ Log: Remove comment: this test is not supposed to fail any more diff --git a/lib-python/3/test/test_io.py b/lib-python/3/test/test_io.py --- a/lib-python/3/test/test_io.py +++ b/lib-python/3/test/test_io.py @@ -1169,12 +1169,7 @@ b = bytearray(2*buffer_size) self.assertEqual(bufio.peek(3), b'fgh') self.assertEqual(rawio._reads, 3) - self.assertEqual(bufio.readinto1(b), 6) # fails because of - # an apparent inconsistency in CPython: readinto1(), if the - # buffered amount is smaller, would always issue one raw read() - # call. This differs from read1(), which if the buffered amount - # if smaller (but more than zero), would just return it without - # any raw read() call. In PyPy both have the behavior of read1(). + self.assertEqual(bufio.readinto1(b), 6) self.assertEqual(b[:6], b"fghjkl") self.assertEqual(rawio._reads, 4) From pypy.commits at gmail.com Mon Nov 20 14:06:31 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 11:06:31 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix tests to match PyPy behaviour Message-ID: <5a1327b7.46901c0a.7be1d.a6b8@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93104:5c2561dd0c89 Date: 2017-11-20 19:06 +0000 http://bitbucket.org/pypy/pypy/changeset/5c2561dd0c89/ Log: Fix tests to match PyPy behaviour diff --git a/lib-python/3/test/test_pydoc.py b/lib-python/3/test/test_pydoc.py --- a/lib-python/3/test/test_pydoc.py +++ b/lib-python/3/test/test_pydoc.py @@ -141,7 +141,7 @@  
Modules - +\x20\x20\x20\x20        
builtins

@@ -878,7 +878,7 @@ @requires_docstrings def test_unbound_builtin_method(self): self.assertEqual(self._get_summary_line(pickle.Pickler.dump), - "dump(self, obj, /)") + "dump(self, obj)") # these no longer include "self" def test_bound_python_method(self): @@ -891,13 +891,13 @@ s = StringIO() p = pickle.Pickler(s) self.assertEqual(self._get_summary_line(p.dump), - "dump(obj, /) method of _pickle.Pickler instance") + "dump(obj) method of pickle._Pickler instance") # this should *never* include self! @requires_docstrings def test_module_level_callable(self): self.assertEqual(self._get_summary_line(os.stat), - "stat(path, *, dir_fd=None, follow_symlinks=True)") + "stat(path, *, dir_fd=-100, follow_symlinks=True)") @unittest.skipUnless(threading, 'Threading required for this test.') From pypy.commits at gmail.com Mon Nov 20 14:25:17 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 11:25:17 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Skip tracemalloc tests Message-ID: <5a132c1d.098a1c0a.cf682.6f3d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93105:a84f8ceb8740 Date: 2017-11-20 19:24 +0000 http://bitbucket.org/pypy/pypy/changeset/a84f8ceb8740/ Log: Skip tracemalloc tests diff --git a/lib-python/3/test/test_tracemalloc.py b/lib-python/3/test/test_tracemalloc.py --- a/lib-python/3/test/test_tracemalloc.py +++ b/lib-python/3/test/test_tracemalloc.py @@ -1,7 +1,6 @@ import contextlib import os import sys -import tracemalloc import unittest from unittest.mock import patch from test.support.script_helper import (assert_python_ok, assert_python_failure, @@ -12,6 +11,11 @@ except ImportError: threading = None +try: + import tracemalloc +except ImportError: + raise unittest.SkipTest("tracemalloc is required") + EMPTY_STRING_SIZE = sys.getsizeof(b'') def get_frames(nframe, lineno_delta): From pypy.commits at gmail.com Mon Nov 20 17:06:05 2017 From: pypy.commits at gmail.com (fijal) Date: Mon, 20 Nov 2017 14:06:05 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: work on formatting Message-ID: <5a1351cd.7a86df0a.46775.559e@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93106:b2f3bd9151c0 Date: 2017-11-20 23:05 +0100 http://bitbucket.org/pypy/pypy/changeset/b2f3bd9151c0/ Log: work on formatting diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -25,7 +25,7 @@ # Fast version of the "strict" errors handler. def raise_unicode_exception_encode(errors, encoding, msg, utf8, startingpos, endingpos): - u_len, flag = rutf8.check_utf8(utf8) + u_len, flag = rutf8.check_utf8(utf8, True) raise OperationError(space.w_UnicodeEncodeError, space.newtuple([space.newtext(encoding), space.newutf8(utf8, u_len, flag), @@ -60,13 +60,6 @@ return True return False -def get_flag_from_code(oc): - if oc <= 0x7F: - return rutf8.FLAG_ASCII - if 0xD800 <= oc <= 0xDFFF: - return rutf8.FLAG_HAS_SURROGATES - return rutf8.FLAG_REGULAR - # These functions take and return unwrapped rpython strings def decode_unicode_escape(space, string): state = space.fromcache(interp_codecs.CodecState) @@ -138,6 +131,24 @@ except rutf8.CheckError: return _str_decode_latin_1_slowpath(s, errors, final, errorhandler) +def _str_decode_latin_1_slowpath(s, errors, final, errorhandler): + res = StringBuilder(len(s)) + i = 0 + while i < len(s): + if ord(s[i]) > 0x7F: + while i < len(s) and ord(s[i]) > 0x7F: + rutf8.unichr_as_utf8_append(res, ord(s[i])) + i += 1 + else: + start = i + end = i + 1 + while end < len(s) and ord(s[end]) <= 0x7F: + end += 1 + res.append_slice(s, start, end) + i = end + # cannot be ASCII, cannot have surrogates, I believe + return res.build(), len(s), len(s), rutf8.FLAG_REGULAR + def utf8_encode_latin_1(s, errors, errorhandler): try: rutf8.check_ascii(s) @@ -159,7 +170,6 @@ res.append(chr(oc)) i += 1 else: - XXX r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) @@ -358,7 +368,7 @@ builder.append(res) else: rutf8.unichr_as_utf8_append(builder, chr, True) - flag = get_flag_from_code(chr) + flag = rutf8.get_flag_from_code(chr) pos += digits size = 1 @@ -503,7 +513,7 @@ continue pos = look + 1 outsize += 1 - flag = combine_flags(flag, get_flag_from_code(code)) + flag = combine_flags(flag, rutf8.get_flag_from_code(code)) rutf8.unichr_as_utf8_append(builder, code) else: res, pos = errorhandler(errors, "unicodeescape", diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -189,14 +189,17 @@ return new_bytearray(space, w_bytearraytype, []) def descr_reduce(self, space): + from pypy.interpreter.unicodehelper import str_decode_latin_1 + assert isinstance(self, W_BytearrayObject) w_dict = self.getdict(space) if w_dict is None: w_dict = space.w_None + s, _, lgt, flag = str_decode_latin_1(''.join(self.getdata()), 'strict', + True, None) return space.newtuple([ space.type(self), space.newtuple([ - space.newunicode(''.join(self.getdata()).decode('latin-1')), - space.newtext('latin-1')]), + space.newutf8(s, lgt, flag), space.newtext('latin-1')]), w_dict]) @staticmethod diff --git a/pypy/objspace/std/formatting.py b/pypy/objspace/std/formatting.py --- a/pypy/objspace/std/formatting.py +++ b/pypy/objspace/std/formatting.py @@ -1,11 +1,11 @@ """String formatting routines""" import sys -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rarithmetic import INT_MAX from rpython.rlib.rfloat import DTSF_ALT, formatd, isnan, isinf -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.rstring import StringBuilder from rpython.rlib.unroll import unrolling_iterable from rpython.tool.sourcetools import func_with_new_name @@ -153,18 +153,15 @@ # to build two subclasses of the BaseStringFormatter class, # each one getting its own subtle differences and RPython types. - if do_unicode: - const = unicode - else: - const = str - class StringFormatter(BaseStringFormatter): def __init__(self, space, fmt, values_w, w_valuedict): BaseStringFormatter.__init__(self, space, values_w, w_valuedict) - self.fmt = fmt # either a string or a unicode + self.fmt = fmt # always a string, if unicode, utf8 encoded def peekchr(self): - # return the 'current' character + # Return the 'current' character. Note that this returns utf8 + # encoded part, but this is ok since we only need one-character + # comparisons try: return self.fmt[self.fmtpos] except IndexError: @@ -201,7 +198,8 @@ if self.w_valuedict is None: raise oefmt(space.w_TypeError, "format requires a mapping") if do_unicode: - w_key = space.newunicode(key) + lgt, flag = rutf8.check_utf8(key, True) + w_key = space.newutf8(key, lgt, flag) else: w_key = space.newbytes(key) return space.getitem(self.w_valuedict, w_key) @@ -287,10 +285,7 @@ @jit.look_inside_iff(lambda self: jit.isconstant(self.fmt)) def format(self): lgt = len(self.fmt) + 4 * len(self.values_w) + 10 - if do_unicode: - result = UnicodeBuilder(lgt) - else: - result = StringBuilder(lgt) + result = StringBuilder(lgt) self.result = result while True: # fast path: consume as many characters as possible @@ -311,7 +306,7 @@ c = self.peekchr() self.forward() if c == '%': - self.std_wp(const('%')) + self.std_wp('%', False) continue if w_value is None: w_value = self.nextinputvalue() @@ -333,22 +328,27 @@ def unknown_fmtchar(self): space = self.space - c = self.fmt[self.fmtpos - 1] - w_s = space.newunicode(c) if do_unicode else space.newbytes(c) + if do_unicode: + cp = rutf8.codepoint_at_pos(self.fmt, self.fmtpos - 1) + flag = rutf8.get_flag_from_code(cp) + w_s = space.newutf8(rutf8.unichr_as_utf8(cp), 1, flag) + else: + cp = ord(self.fmt[self.fmtpos - 1]) + w_s = space.newbytes(chr(cp)) raise oefmt(space.w_ValueError, "unsupported format character %R (%s) at index %d", - w_s, hex(ord(c)), self.fmtpos - 1) + w_s, hex(cp), self.fmtpos - 1) - @specialize.argtype(1) - def std_wp(self, r): + @specialize.arg(2) + def std_wp(self, r, is_string=False): length = len(r) - if do_unicode and isinstance(r, str): + if do_unicode and is_string: # convert string to unicode using the default encoding - r = self.space.unicode_w(self.space.newbytes(r)) + r = self.space.utf8_w(self.space.newbytes(r)) prec = self.prec if prec == -1 and self.width == 0: # fast path - self.result.append(const(r)) + self.result.append(r) return if prec >= 0 and prec < length: length = prec # ignore the end of the string if too long @@ -358,12 +358,12 @@ padding = 0 assert padding >= 0 if not self.f_ljust and padding > 0: - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # add any padding at the left of 'r' padding = 0 result.append_slice(r, 0, length) # add 'r' itself if padding > 0: - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # add any remaining padding at the right def std_wp_number(self, r, prefix=''): @@ -375,10 +375,10 @@ # result.append(), and no startswith() if not f_sign and # not f_blank). if self.f_sign and not r.startswith('-'): - result.append(const('+')) + result.append('+') elif self.f_blank and not r.startswith('-'): - result.append(const(' ')) - result.append(const(r)) + result.append(' ') + result.append(r) return # add a '+' or ' ' sign if necessary sign = r.startswith('-') @@ -405,18 +405,18 @@ assert padding >= 0 if padnumber == '>': - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) # pad with spaces on the left if sign: - result.append(const(r[0])) # the sign - result.append(const(prefix)) # the prefix + result.append(r[0]) # the sign + result.append(prefix) # the prefix if padnumber == '0': - result.append_multiple_char(const('0'), padding) + result.append_multiple_char('0', padding) # pad with zeroes - result.append_slice(const(r), int(sign), len(r)) + result.append_slice(r, int(sign), len(r)) # the rest of the number if padnumber == '<': # spaces on the right - result.append_multiple_char(const(' '), padding) + result.append_multiple_char(' ', padding) def string_formatting(self, w_value): space = self.space @@ -425,8 +425,7 @@ raise oefmt(space.w_TypeError, "operand does not support unary str") w_result = space.get_and_call_function(w_impl, w_value) - if space.isinstance_w(w_result, - space.w_unicode): + if space.isinstance_w(w_result, space.w_unicode): raise NeedUnicodeFormattingError return space.bytes_w(w_result) @@ -443,11 +442,11 @@ else: from pypy.objspace.std.unicodeobject import unicode_from_object w_value = unicode_from_object(space, w_value) - s = space.unicode_w(w_value) - self.std_wp(s) + s = space.utf8_w(w_value) + self.std_wp(s, False) def fmt_r(self, w_value): - self.std_wp(self.space.text_w(self.space.repr(w_value))) + self.std_wp(self.space.text_w(self.space.repr(w_value)), True) def fmt_c(self, w_value): self.prec = -1 # just because @@ -456,30 +455,30 @@ s = space.bytes_w(w_value) if len(s) != 1: raise oefmt(space.w_TypeError, "%c requires int or char") - self.std_wp(s) + self.std_wp(s, True) elif space.isinstance_w(w_value, space.w_unicode): if not do_unicode: raise NeedUnicodeFormattingError - ustr = space.unicode_w(w_value) + ustr = space.utf8_w(w_value) if len(ustr) != 1: raise oefmt(space.w_TypeError, "%c requires int or unichar") - self.std_wp(ustr) + self.std_wp(ustr, False) else: n = space.int_w(w_value) if do_unicode: try: - c = unichr(n) + c = rutf8.unichr_as_utf8(n) except ValueError: raise oefmt(space.w_OverflowError, "unicode character code out of range") - self.std_wp(c) + self.std_wp(c, False) else: try: s = chr(n) except ValueError: raise oefmt(space.w_OverflowError, "character code not in range(256)") - self.std_wp(s) + self.std_wp(s, True) return StringFormatter @@ -510,11 +509,12 @@ pass else: return space.newbytes(result) - # XXX for now, this is performance critical - fmt = space.utf8_w(w_fmt).decode("utf8") + fmt = space.utf8_w(w_fmt) formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict) result = formatter.format() - return space.newunicode(result) + # this can force strings, not sure if it's a problem or not + lgt, flag = rutf8.check_utf8(result, True) + return space.newutf8(result, lgt, flag) def mod_format(space, w_format, w_values, do_unicode=False): if space.isinstance_w(w_values, space.w_tuple): diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -4,11 +4,12 @@ import string from pypy.interpreter.error import OperationError, oefmt -from rpython.rlib import rstring, runicode, rlocale, rfloat, jit +from rpython.rlib import rstring, runicode, rlocale, rfloat, jit, rutf8 from rpython.rlib.objectmodel import specialize from rpython.rlib.rfloat import copysign, formatd from rpython.rlib.rarithmetic import r_uint, intmask from pypy.interpreter.signature import Signature +from pypy.interpreter import unicodehelper @specialize.argtype(1) @@ -50,7 +51,8 @@ if for_unicode: def wrap(self, u): - return self.space.newunicode(u) + lgt, flag = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt, flag) else: def wrap(self, s): return self.space.newbytes(s) @@ -59,7 +61,6 @@ def __init__(self, space, template): self.space = space - self.empty = u"" if self.is_unicode else "" self.template = template def build(self, args): @@ -80,10 +81,7 @@ def _build_string(self, start, end, level): space = self.space - if self.is_unicode: - out = rstring.UnicodeBuilder() - else: - out = rstring.StringBuilder() + out = rstring.StringBuilder() if not level: raise oefmt(space.w_ValueError, "Recursion depth exceeded") level -= 1 @@ -344,7 +342,7 @@ w_conversion]) self.parser_list_w.append(w_entry) self.last_end = end + 1 - return self.empty + return "" # w_obj = self._get_argument(name) if conversion is not None: @@ -352,7 +350,7 @@ if recursive: spec = self._build_string(spec_start, end, level) w_rendered = self.space.format(w_obj, self.wrap(spec)) - unwrapper = "unicode_w" if self.is_unicode else "bytes_w" + unwrapper = "utf8_w" if self.is_unicode else "bytes_w" to_interp = getattr(self.space, unwrapper) return to_interp(w_rendered) @@ -379,8 +377,10 @@ def format_method(space, w_string, args, is_unicode): if is_unicode: template = unicode_template_formatter(space, - space.unicode_w(w_string)) - return space.newunicode(template.build(args)) + space.utf8_w(w_string)) + r = template.build(args) + lgt, flag = rutf8.check_utf8(r, True) + return space.newutf8(r, lgt, flag) else: template = str_template_formatter(space, space.bytes_w(w_string)) return space.newbytes(template.build(args)) @@ -416,7 +416,8 @@ if for_unicode: def wrap(self, u): - return self.space.newunicode(u) + lgt, flag = rutf8.check_utf8(u, True) + return self.space.newutf8(u, lgt, flag) else: def wrap(self, s): return self.space.newbytes(s) @@ -426,7 +427,6 @@ def __init__(self, space, spec): self.space = space - self.empty = u"" if self.is_unicode else "" self.spec = spec def _is_alignment(self, c): @@ -492,8 +492,9 @@ presentation_type = spec[i] if self.is_unicode: try: - the_type = spec[i].encode("ascii")[0] - except UnicodeEncodeError: + rutf8.check_utf8(spec[i], True) + the_type = spec[i][0] + except rutf8.CheckError: raise oefmt(space.w_ValueError, "invalid presentation type") else: @@ -538,8 +539,9 @@ return total def _lit(self, s): + assert len(s) == 1 if self.is_unicode: - return s.decode("latin-1") + return rutf8.unichr_as_utf8(ord(s[0])) else: return s @@ -551,10 +553,7 @@ return builder.build() def _builder(self): - if self.is_unicode: - return rstring.UnicodeBuilder() - else: - return rstring.StringBuilder() + return rstring.StringBuilder() def _unknown_presentation(self, tp): raise oefmt(self.space.w_ValueError, @@ -598,8 +597,8 @@ thousands = "" grouping = "\xFF" # special value to mean 'stop' if self.is_unicode: - self._loc_dec = dec.decode("latin-1") - self._loc_thousands = thousands.decode("latin-1") + self._loc_dec = rutf8.decode_latin_1(dec) + self._loc_thousands = rutf8.decode_latin_1(thousands) else: self._loc_dec = dec self._loc_thousands = thousands @@ -718,7 +717,7 @@ ts = self._loc_thousands if need_separator else None self._fill_digits(buf, digits, left, n_chars, n_zeros, ts) buf.reverse() - self._grouped_digits = self.empty.join(buf) + self._grouped_digits = "".join(buf) def _upcase_string(self, s): buf = [] @@ -727,7 +726,7 @@ if ord("a") <= index <= ord("z"): c = chr(index - 32) buf.append(c) - return self.empty.join(buf) + return "".join(buf) def _fill_number(self, spec, num, to_digits, to_prefix, fill_char, @@ -736,10 +735,7 @@ if spec.n_lpadding: out.append_multiple_char(fill_char[0], spec.n_lpadding) if spec.n_sign: - if self.is_unicode: - sign = spec.sign.decode("latin-1") - else: - sign = spec.sign + sign = self._lit(spec.sign) out.append(sign) if spec.n_prefix: pref = num[to_prefix:to_prefix + spec.n_prefix] @@ -783,13 +779,13 @@ raise oefmt(space.w_ValueError, "sign not allowed with 'c' presentation type") value = space.int_w(w_num) - max_char = runicode.MAXUNICODE if self.is_unicode else 0xFF + max_char = 0x10FFFF if self.is_unicode else 0xFF if not (0 <= value <= max_char): raise oefmt(space.w_OverflowError, "%%c arg not in range(%s)", hex(max_char)) if self.is_unicode: - result = runicode.UNICHR(value) + result = rutf8.unichr_as_utf8(value) else: result = chr(value) n_digits = 1 @@ -845,6 +841,7 @@ prefix = "0x" as_str = value.format(LONG_DIGITS[:base], prefix) if self.is_unicode: + XXX return as_str.decode("latin-1") return as_str @@ -852,7 +849,7 @@ if base == 10: s = str(value) if self.is_unicode: - return s.decode("latin-1") + return rutf8.decode_latin_1(s) return s # This part is slow. negative = value < 0 @@ -893,7 +890,7 @@ i -= 1 buf[i] = "-" assert i >= 0 - return self.empty.join(buf[i:]) + return "".join(buf[i:]) def format_int_or_long(self, w_num, kind): space = self.space @@ -975,7 +972,7 @@ have_dec_point, to_remainder = self._parse_number(result, to_number) n_remainder = len(result) - to_remainder if self.is_unicode: - digits = result.decode("latin-1") + digits = rutf8.decode_latin_1(result) else: digits = result spec = self._calc_num_width(0, sign, to_number, n_digits, @@ -1081,8 +1078,8 @@ to_imag_number) if self.is_unicode: - re_num = re_num.decode("latin-1") - im_num = im_num.decode("latin-1") + re_num = rutf8.decode_latin_1(re_num) + im_num = rutf8.decode_latin_1(im_num) #set remainder, in CPython _parse_number sets this #using n_re_digits causes tests to fail @@ -1111,7 +1108,7 @@ self._fill_char = tmp_fill_char #compute L and R padding - stored in self._left_pad and self._right_pad - self._calc_padding(self.empty, re_spec.n_total + im_spec.n_total + 1 + + self._calc_padding("", re_spec.n_total + im_spec.n_total + 1 + add_parens * 2) out = self._builder() @@ -1172,7 +1169,7 @@ @specialize.arg(2) def run_formatter(space, w_format_spec, meth, *args): if space.isinstance_w(w_format_spec, space.w_unicode): - formatter = unicode_formatter(space, space.unicode_w(w_format_spec)) + formatter = unicode_formatter(space, space.utf8_w(w_format_spec)) return getattr(formatter, meth)(*args) else: formatter = str_formatter(space, space.bytes_w(w_format_spec)) diff --git a/pypy/objspace/std/test/test_liststrategies.py b/pypy/objspace/std/test/test_liststrategies.py --- a/pypy/objspace/std/test/test_liststrategies.py +++ b/pypy/objspace/std/test/test_liststrategies.py @@ -600,9 +600,9 @@ def test_unicode(self): l1 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newbytes("zwei")]) assert isinstance(l1.strategy, BytesListStrategy) - l2 = W_ListObject(self.space, [self.space.newunicode(u"eins"), self.space.newunicode(u"zwei")]) + l2 = W_ListObject(self.space, [self.space.newutf8("eins", 4, 2), self.space.newutf8("zwei", 4, 2)]) assert isinstance(l2.strategy, UnicodeListStrategy) - l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newunicode(u"zwei")]) + l3 = W_ListObject(self.space, [self.space.newbytes("eins"), self.space.newutf8("zwei", 4, 2)]) assert isinstance(l3.strategy, ObjectListStrategy) def test_listview_bytes(self): @@ -626,7 +626,7 @@ # the same for unicode w_l = self.space.newlist([self.space.wrap(u'a'), self.space.wrap(u'b')]) w_l.getitems = None - assert space.unicode_w(space.call_method(space.wrap(u"c"), "join", w_l)) == u"acb" + assert space.utf8_w(space.call_method(space.wrap(u"c"), "join", w_l)) == "acb" def test_string_join_returns_same_instance(self): space = self.space diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -331,12 +331,11 @@ def descr__format__(self, space, w_format_spec): if not space.isinstance_w(w_format_spec, space.w_unicode): w_format_spec = space.call_function(space.w_unicode, w_format_spec) - spec = space.unicode_w(w_format_spec) + spec = space.utf8_w(w_format_spec) formatter = newformat.unicode_formatter(space, spec) self2 = unicode_from_object(space, self) assert isinstance(self2, W_UnicodeObject) - # XXX - return formatter.format_string(self2._utf8.decode("utf8")) + return formatter.format_string(self2._utf8) def descr_mod(self, space, w_values): return mod_format(space, self, w_values, do_unicode=True) @@ -526,12 +525,12 @@ def descr_formatter_parser(self, space): from pypy.objspace.std.newformat import unicode_template_formatter - tformat = unicode_template_formatter(space, space.unicode_w(self)) + tformat = unicode_template_formatter(space, space.utf8_w(self)) return tformat.formatter_parser() def descr_formatter_field_name_split(self, space): from pypy.objspace.std.newformat import unicode_template_formatter - tformat = unicode_template_formatter(space, space.unicode_w(self)) + tformat = unicode_template_formatter(space, space.utf8_w(self)) return tformat.formatter_field_name_split() def descr_lower(self, space): @@ -1188,8 +1187,7 @@ rutf8.check_ascii(s) except rutf8.CheckError as a: eh = unicodehelper.encode_error_handler(space) - u_len = w_object._len() - eh(None, "ascii", "ordinal not in range(128)", s, u_len, + eh(None, "ascii", "ordinal not in range(128)", s, a.pos, a.pos + 1) assert False, "always raises" return space.newbytes(s) @@ -1260,7 +1258,7 @@ # test_unicode_conversion_with__str__ if w_unicode_method is None: if space.isinstance_w(w_obj, space.w_unicode): - return space.newunicode(space.unicode_w(w_obj)) + return unicodehelper.convert_arg_to_w_unicode(space, w_obj) w_unicode_method = space.lookup(w_obj, "__str__") if w_unicode_method is not None: w_res = space.get_and_call_function(w_unicode_method, w_obj) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -123,6 +123,13 @@ continuation_bytes += 1 return len(s) - continuation_bytes +def get_flag_from_code(oc): + if oc <= 0x7F: + return FLAG_ASCII + if 0xD800 <= oc <= 0xDFFF: + return FLAG_HAS_SURROGATES + return FLAG_REGULAR + def codepoint_at_pos(code, pos): """ Give a codepoint in code at pos - assumes valid utf8, no checking! """ @@ -651,3 +658,30 @@ return unicode_escape #, char_escape_helper +def decode_latin_1(s): + if len(s) == 0: + return s + if len(s) == 1 and ord(s[0]) <= 0x7F: + return s + try: + check_ascii(s) + return s + except CheckError: + return _decode_latin_1_slowpath(s) + +def _decode_latin_1_slowpath(s): + res = StringBuilder(len(s)) + i = 0 + while i < len(s): + if ord(s[i]) > 0x7F: + while i < len(s) and ord(s[i]) > 0x7F: + unichr_as_utf8_append(res, ord(s[i])) + i += 1 + else: + start = i + end = i + 1 + while end < len(s) and ord(s[end]) <= 0x7F: + end += 1 + res.append_slice(s, start, end) + i = end + return res.build() From pypy.commits at gmail.com Mon Nov 20 20:59:53 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 17:59:53 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a138899.968ddf0a.433a7.c954@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93107:3e868c28555c Date: 2017-11-21 01:59 +0000 http://bitbucket.org/pypy/pypy/changeset/3e868c28555c/ Log: hg merge default diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -20,3 +20,9 @@ .. branch: run-extra-tests Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -339,17 +368,24 @@ def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) def foo(c): bar(c) # + assert stack() == ['test_f_back'] c = continulet(foo) f1_bar = c.switch() assert f1_bar.f_code.co_name == 'bar' @@ -358,14 +394,20 @@ f3_foo = c.switch() assert f3_foo is f2_foo assert f1_bar.f_back is f3_foo + # def main(): f4_main = c.switch() assert f4_main.f_code.co_name == 'main' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): f5_main2 = c.switch() assert f5_main2.f_code.co_name == 'main2' assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_vmprof/interp_vmprof.py b/pypy/module/_vmprof/interp_vmprof.py --- a/pypy/module/_vmprof/interp_vmprof.py +++ b/pypy/module/_vmprof/interp_vmprof.py @@ -93,8 +93,8 @@ return space.newtext(path) def stop_sampling(space): - return space.newint(rvmprof.stop_sampling(space)) + return space.newint(rvmprof.stop_sampling()) def start_sampling(space): - rvmprof.start_sampling(space) + rvmprof.start_sampling() return space.w_None diff --git a/pypy/module/cpyext/longobject.py b/pypy/module/cpyext/longobject.py --- a/pypy/module/cpyext/longobject.py +++ b/pypy/module/cpyext/longobject.py @@ -237,9 +237,9 @@ assert isinstance(w_long, W_LongObject) return w_long.num.sign -UCHARP = lltype.Ptr(lltype.Array( - rffi.UCHAR, hints={'nolength':True, 'render_as_const':True})) - at cpython_api([UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject) +CONST_UCHARP = lltype.Ptr(lltype.Array(rffi.UCHAR, hints={'nolength': True, + 'render_as_const': True})) + at cpython_api([CONST_UCHARP, rffi.SIZE_T, rffi.INT_real, rffi.INT_real], PyObject) def _PyLong_FromByteArray(space, bytes, n, little_endian, signed): little_endian = rffi.cast(lltype.Signed, little_endian) signed = rffi.cast(lltype.Signed, signed) diff --git a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py --- a/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py +++ b/pypy/module/test_lib_pypy/cffi_tests/cffi1/test_recompiler.py @@ -2271,7 +2271,7 @@ char32_t foo_4bytes(char32_t); """) lib = verify(ffi, "test_char16_char32_type" + no_cpp * "_nocpp", """ - #if !defined(__cplusplus) || __cplusplus < 201103L + #if !defined(__cplusplus) || (!defined(_LIBCPP_VERSION) && __cplusplus < 201103L) typedef uint_least16_t char16_t; typedef uint_least32_t char32_t; #endif diff --git a/pypy/module/thread/test/test_import_lock.py b/pypy/module/thread/test/test_import_lock.py --- a/pypy/module/thread/test/test_import_lock.py +++ b/pypy/module/thread/test/test_import_lock.py @@ -101,8 +101,8 @@ importhook(space, 'sys') assert importlock.count == 0 # A new module - importhook(space, "time") - assert importlock.count == 1 + importhook(space, 're') + assert importlock.count >= 9 # Import it again previous_count = importlock.count importhook(space, "time") diff --git a/rpython/annotator/annrpython.py b/rpython/annotator/annrpython.py --- a/rpython/annotator/annrpython.py +++ b/rpython/annotator/annrpython.py @@ -15,34 +15,10 @@ typeof, s_ImpossibleValue, SomeInstance, intersection, difference) from rpython.annotator.bookkeeper import Bookkeeper from rpython.rtyper.normalizecalls import perform_normalizations -from collections import deque log = AnsiLogger("annrpython") -class ShuffleDict(object): - def __init__(self): - self._d = {} - self.keys = deque() - - def __setitem__(self, k, v): - if k in self._d: - self._d[k] = v - else: - self._d[k] = v - self.keys.append(k) - - def __getitem__(self, k): - return self._d[k] - - def popitem(self): - key = self.keys.popleft() - item = self._d.pop(key) - return (key, item) - - def __nonzero__(self): - return bool(self._d) - class RPythonAnnotator(object): """Block annotator for RPython. See description in doc/translation.txt.""" @@ -57,7 +33,7 @@ translator = TranslationContext() translator.annotator = self self.translator = translator - self.pendingblocks = ShuffleDict() # map {block: graph-containing-it} + self.genpendingblocks=[{}] # [{block: graph-containing-it}] * generation self.annotated = {} # set of blocks already seen self.added_blocks = None # see processblock() below self.links_followed = {} # set of links that have ever been followed @@ -81,7 +57,7 @@ self.errors = [] def __getstate__(self): - attrs = """translator pendingblocks annotated links_followed + attrs = """translator genpendingblocks annotated links_followed notify bookkeeper frozen policy added_blocks""".split() ret = self.__dict__.copy() for key, value in ret.items(): @@ -212,19 +188,47 @@ else: self.mergeinputargs(graph, block, cells) if not self.annotated[block]: - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) + + def schedulependingblock(self, graph, block): + # 'self.genpendingblocks' is a list of dictionaries which is + # logically equivalent to just one dictionary. But we keep a + # 'generation' number on each block (=key), and whenever we + # process a block, we increase its generation number. The + # block is added to the 'genpendingblocks' indexed by its + # generation number. See complete_pending_blocks() below. + generation = getattr(block, 'generation', 0) + self.genpendingblocks[generation][block] = graph def complete_pending_blocks(self): - while self.pendingblocks: - block, graph = self.pendingblocks.popitem() - self.processblock(graph, block) + while True: + # Find the first of the dictionaries in 'self.genpendingblocks' + # which is not empty + gen = 0 + for pendingblocks in self.genpendingblocks: + if pendingblocks: + break + gen += 1 + else: + return # all empty => done + + gen += 1 # next generation number + if len(self.genpendingblocks) == gen: + self.genpendingblocks.append({}) + + # Process all blocks at this level + # (if any gets re-inserted, it will be into the next level) + while pendingblocks: + block, graph = pendingblocks.popitem() + block.generation = gen + self.processblock(graph, block) def complete(self): """Process pending blocks until none is left.""" while True: self.complete_pending_blocks() self.policy.no_more_blocks_to_annotate(self) - if not self.pendingblocks: + if not any(self.genpendingblocks): break # finished # make sure that the return variables of all graphs is annotated if self.added_blocks is not None: @@ -309,21 +313,15 @@ #___ interface for annotator.bookkeeper _______ def recursivecall(self, graph, whence, inputcells): - if isinstance(whence, tuple): + if whence is not None: parent_graph, parent_block, parent_index = whence tag = parent_block, parent_index self.translator.update_call_graph(parent_graph, graph, tag) - # self.notify[graph.returnblock] is a dictionary of call - # points to this func which triggers a reflow whenever the - # return block of this graph has been analysed. - callpositions = self.notify.setdefault(graph.returnblock, {}) - if whence is not None: - if callable(whence): - def callback(): - whence(self, graph) - else: - callback = whence - callpositions[callback] = True + # self.notify[graph.returnblock] is a set of call + # points to this func which triggers a reflow whenever the + # return block of this graph has been analysed. + returnpositions = self.notify.setdefault(graph.returnblock, set()) + returnpositions.add(whence) # generalize the function's input arguments self.addpendingblock(graph, graph.startblock, inputcells) @@ -416,7 +414,7 @@ def reflowpendingblock(self, graph, block): assert not self.frozen assert graph not in self.fixed_graphs - self.pendingblocks[block] = graph + self.schedulependingblock(graph, block) assert block in self.annotated self.annotated[block] = False # must re-flow self.blocked_blocks[block] = (graph, None) @@ -574,12 +572,8 @@ self.follow_link(graph, link, constraints) if block in self.notify: - # reflow from certain positions when this block is done - for callback in self.notify[block]: - if isinstance(callback, tuple): - self.reflowfromposition(callback) # callback is a position - else: - callback() + for position in self.notify[block]: + self.reflowfromposition(position) def follow_link(self, graph, link, constraints): diff --git a/rpython/annotator/bookkeeper.py b/rpython/annotator/bookkeeper.py --- a/rpython/annotator/bookkeeper.py +++ b/rpython/annotator/bookkeeper.py @@ -547,10 +547,8 @@ (position_key, "first") and (position_key, "second"). In general, "unique_key" should somehow uniquely identify where - the call is in the source code, and "callback" can be either a - position_key to reflow from when we see more general results, - or a real callback function that will be called with arguments - # "(annotator, called_graph)" whenever the result is generalized. + the call is in the source code, and "callback" is a + position_key to reflow from when we see more general results. "replace" can be set to a list of old unique_key values to forget now, because the given "unique_key" replaces them. diff --git a/rpython/annotator/dictdef.py b/rpython/annotator/dictdef.py --- a/rpython/annotator/dictdef.py +++ b/rpython/annotator/dictdef.py @@ -1,5 +1,5 @@ from rpython.annotator.model import ( - s_ImpossibleValue, SomeInteger, s_Bool, union) + s_ImpossibleValue, SomeInteger, s_Bool, union, AnnotatorError) from rpython.annotator.listdef import ListItem from rpython.rlib.objectmodel import compute_hash @@ -51,23 +51,19 @@ s_key = self.s_value - def check_eqfn(annotator, graph): - s = annotator.binding(graph.getreturnvar()) - assert s_Bool.contains(s), ( + s = self.bookkeeper.emulate_pbc_call( + myeq, self.s_rdict_eqfn, [s_key, s_key], replace=replace_othereq) + if not s_Bool.contains(s): + raise AnnotatorError( "the custom eq function of an r_dict must return a boolean" " (got %r)" % (s,)) - self.bookkeeper.emulate_pbc_call(myeq, self.s_rdict_eqfn, [s_key, s_key], - replace=replace_othereq, - callback = check_eqfn) - def check_hashfn(annotator, graph): - s = annotator.binding(graph.getreturnvar()) - assert SomeInteger().contains(s), ( + s = self.bookkeeper.emulate_pbc_call( + myhash, self.s_rdict_hashfn, [s_key], replace=replace_otherhash) + if not SomeInteger().contains(s): + raise AnnotatorError( "the custom hash function of an r_dict must return an integer" " (got %r)" % (s,)) - self.bookkeeper.emulate_pbc_call(myhash, self.s_rdict_hashfn, [s_key], - replace=replace_otherhash, - callback = check_hashfn) class DictValue(ListItem): @@ -93,11 +89,11 @@ self.force_non_null = force_non_null def read_key(self, position_key): - self.dictkey.read_locations[position_key] = True + self.dictkey.read_locations.add(position_key) return self.dictkey.s_value def read_value(self, position_key): - self.dictvalue.read_locations[position_key] = True + self.dictvalue.read_locations.add(position_key) return self.dictvalue.s_value def same_as(self, other): diff --git a/rpython/annotator/listdef.py b/rpython/annotator/listdef.py --- a/rpython/annotator/listdef.py +++ b/rpython/annotator/listdef.py @@ -30,7 +30,7 @@ self.s_value = s_value self.bookkeeper = bookkeeper self.itemof = {} # set of all ListDefs using this ListItem - self.read_locations = {} + self.read_locations = set() if bookkeeper is None: self.dont_change_any_more = True @@ -95,7 +95,7 @@ self.notify_update() if s_new_value != s_other_value: other.notify_update() - self.read_locations.update(other.read_locations) + self.read_locations |= other.read_locations def patch(self): for listdef in self.itemof: @@ -130,7 +130,7 @@ self.listitem.itemof[self] = True def read_item(self, position_key): - self.listitem.read_locations[position_key] = True + self.listitem.read_locations.add(position_key) return self.listitem.s_value def same_as(self, other): diff --git a/rpython/annotator/test/test_annrpython.py b/rpython/annotator/test/test_annrpython.py --- a/rpython/annotator/test/test_annrpython.py +++ b/rpython/annotator/test/test_annrpython.py @@ -2141,28 +2141,6 @@ assert (fdesc.get_s_signatures((2, (), False)) == [([someint,someint],someint)]) - def test_emulated_pbc_call_callback(self): - def f(a,b): - return a + b - from rpython.annotator import annrpython - a = annrpython.RPythonAnnotator() - from rpython.annotator import model as annmodel - - memo = [] - def callb(ann, graph): - memo.append(annmodel.SomeInteger() == ann.binding(graph.getreturnvar())) - - s_f = a.bookkeeper.immutablevalue(f) - s = a.bookkeeper.emulate_pbc_call('f', s_f, [annmodel.SomeInteger(), annmodel.SomeInteger()], - callback=callb) - assert s == annmodel.SomeImpossibleValue() - a.complete() - - assert a.binding(graphof(a, f).getreturnvar()).knowntype == int - assert len(memo) >= 1 - for t in memo: - assert t - def test_iterator_union(self): def it(d): return d.iteritems() diff --git a/rpython/config/support.py b/rpython/config/support.py --- a/rpython/config/support.py +++ b/rpython/config/support.py @@ -41,8 +41,8 @@ Function to determine if your system comes with PAX protection. """ if sys.platform.startswith('linux'): - # we need a running process PID and 1 is always running - with open("/proc/1/status") as fd: + # use PID of current process for the check + with open("/proc/self/status") as fd: data = fd.read() if 'PaX' in data: return True diff --git a/rpython/flowspace/model.py b/rpython/flowspace/model.py --- a/rpython/flowspace/model.py +++ b/rpython/flowspace/model.py @@ -170,7 +170,7 @@ class Block(object): __slots__ = """inputargs operations exitswitch - exits blockcolor""".split() + exits blockcolor generation""".split() def __init__(self, inputargs): self.inputargs = list(inputargs) # mixed list of variable/const XXX diff --git a/rpython/rlib/rerased.py b/rpython/rlib/rerased.py --- a/rpython/rlib/rerased.py +++ b/rpython/rlib/rerased.py @@ -15,6 +15,8 @@ """ import sys +from collections import defaultdict + from rpython.annotator import model as annmodel from rpython.rtyper.extregistry import ExtRegistryEntry from rpython.rtyper.llannotation import lltype_to_annotation @@ -48,34 +50,29 @@ def __deepcopy__(self, memo): return self - def _getdict(self, bk): - try: - dict = bk._erasing_pairs_tunnel - except AttributeError: - dict = bk._erasing_pairs_tunnel = {} - return dict +class IdentityDesc(object): + def __init__(self, bookkeeper): + self.bookkeeper = bookkeeper + self.s_input = annmodel.s_ImpossibleValue + self.reflowpositions = {} - def enter_tunnel(self, bookkeeper, s_obj): - dict = self._getdict(bookkeeper) - s_previousobj, reflowpositions = dict.setdefault( - self, (annmodel.s_ImpossibleValue, {})) - s_obj = annmodel.unionof(s_previousobj, s_obj) - if s_obj != s_previousobj: - dict[self] = (s_obj, reflowpositions) - for position in reflowpositions: - bookkeeper.annotator.reflowfromposition(position) + def enter_tunnel(self, s_obj): + s_obj = annmodel.unionof(self.s_input, s_obj) + if s_obj != self.s_input: + self.s_input = s_obj + for position in self.reflowpositions: + self.bookkeeper.annotator.reflowfromposition(position) - def leave_tunnel(self, bookkeeper): - dict = self._getdict(bookkeeper) - s_obj, reflowpositions = dict.setdefault( - self, (annmodel.s_ImpossibleValue, {})) - reflowpositions[bookkeeper.position_key] = True - return s_obj + def leave_tunnel(self): + self.reflowpositions[self.bookkeeper.position_key] = True + return self.s_input - def get_input_annotation(self, bookkeeper): - dict = self._getdict(bookkeeper) - s_obj, _ = dict[self] - return s_obj +def _get_desc(bk, identity): + try: + descs = bk._erasing_pairs_descs + except AttributeError: + descs = bk._erasing_pairs_descs = defaultdict(lambda: IdentityDesc(bk)) + return descs[identity] _identity_for_ints = ErasingPairIdentity("int") @@ -94,21 +91,23 @@ _about_ = erase def compute_result_annotation(self, s_obj): - identity.enter_tunnel(self.bookkeeper, s_obj) + desc = _get_desc(self.bookkeeper, identity) + desc.enter_tunnel(s_obj) return _some_erased() def specialize_call(self, hop): bk = hop.rtyper.annotator.bookkeeper - s_obj = identity.get_input_annotation(bk) + desc = _get_desc(bk, identity) hop.exception_cannot_occur() - return _rtype_erase(hop, s_obj) + return _rtype_erase(hop, desc.s_input) class Entry(ExtRegistryEntry): _about_ = unerase def compute_result_annotation(self, s_obj): assert _some_erased().contains(s_obj) - return identity.leave_tunnel(self.bookkeeper) + desc = _get_desc(self.bookkeeper, identity) + return desc.leave_tunnel() def specialize_call(self, hop): hop.exception_cannot_occur() @@ -130,6 +129,7 @@ def __init__(self, x, identity): self._x = x self._identity = identity + def __repr__(self): return "Erased(%r, %r)" % (self._x, self._identity) @@ -140,7 +140,7 @@ assert config.translation.taggedpointers, "need to enable tagged pointers to use erase_int" return lltype.cast_int_to_ptr(r_self.lowleveltype, value._x * 2 + 1) bk = r_self.rtyper.annotator.bookkeeper - s_obj = value._identity.get_input_annotation(bk) + s_obj = _get_desc(bk, value._identity).s_input r_obj = r_self.rtyper.getrepr(s_obj) if r_obj.lowleveltype is lltype.Void: return lltype.nullptr(r_self.lowleveltype.TO) @@ -182,9 +182,9 @@ _type_ = Erased def compute_annotation(self): - identity = self.instance._identity + desc = _get_desc(self.bookkeeper, self.instance._identity) s_obj = self.bookkeeper.immutablevalue(self.instance._x) - identity.enter_tunnel(self.bookkeeper, s_obj) + desc.enter_tunnel(s_obj) return _some_erased() # annotation and rtyping support diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py --- a/rpython/rlib/rposix.py +++ b/rpython/rlib/rposix.py @@ -1881,7 +1881,8 @@ c_chroot = external('chroot', [rffi.CCHARP], rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO, - macro=_MACRO_ON_POSIX) + macro=_MACRO_ON_POSIX, + compilation_info=ExternalCompilationInfo(includes=['unistd.h'])) @replace_os_function('chroot') def chroot(path): diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,6 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory +from rpython.rlib import rvmprof from rpython.rlib.rvmprof import cintf DEBUG = False @@ -40,11 +41,13 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) + rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) + rvmprof.start_sampling() if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -55,9 +55,9 @@ return None -def stop_sampling(space): +def stop_sampling(): fd = _get_vmprof().cintf.vmprof_stop_sampling() return rffi.cast(lltype.Signed, fd) -def start_sampling(space): +def start_sampling(): _get_vmprof().cintf.vmprof_start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -62,7 +62,6 @@ SHARED.join('compat.c'), SHARED.join('machine.c'), SHARED.join('vmp_stack.c'), - SHARED.join('vmprof_mt.c'), SHARED.join('vmprof_memory.c'), SHARED.join('vmprof_common.c'), # symbol table already in separate_module_files @@ -70,6 +69,10 @@ post_include_bits=[], compile_extra=compile_extra ) +if sys.platform != 'win32': + eci_kwds['separate_module_files'].append( + SHARED.join('vmprof_mt.c'), + ) global_eci = ExternalCompilationInfo(**eci_kwds) def configure_libbacktrace_linux(): diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -164,23 +164,25 @@ @rvmprof.vmprof_execute_code("xcode1", lambda self, code, count: code) def main(self, code, count): + code = self.MyCode('py:main:3:main') + rvmprof.register_code(code, self.MyCode.get_name) + code = self.MyCode('py:code:7:native_func') + rvmprof.register_code(code, self.MyCode.get_name) if count > 0: return self.main(code, count-1) else: return self.native_func(100) def test(self): - # XXX: this test is known to fail since rev a4f077ba651c, but buildbot - # never ran it. FIXME. from vmprof import read_profile - from vmprof.show import PrettyPrinter + # from vmprof.show import PrettyPrinter assert self.rpy_entry_point(3, 0.5) == 42000 assert self.tmpfile.check() - # + prof = read_profile(self.tmpfilename) tree = prof.get_tree() - p = PrettyPrinter() - p._print_tree(tree) + # p = PrettyPrinter() + # p._print_tree(tree) def walk(tree, symbols): symbols.append(tree.name) if len(tree.children) == 0: @@ -189,7 +191,7 @@ walk(child, symbols) symbols = [] walk(tree, symbols) - not_found = ['n:native_func'] + not_found = ['py:code:7:native_func'] for sym in symbols: for i,name in enumerate(not_found): if sym.startswith(name): diff --git a/rpython/rlib/test/test_rarithmetic.py b/rpython/rlib/test/test_rarithmetic.py --- a/rpython/rlib/test/test_rarithmetic.py +++ b/rpython/rlib/test/test_rarithmetic.py @@ -2,7 +2,7 @@ from rpython.rtyper.test.test_llinterp import interpret from rpython.rlib.rarithmetic import * from rpython.rlib.rstring import ParseStringError, ParseStringOverflowError -from hypothesis import given, strategies +from hypothesis import given, strategies, assume import sys import py @@ -404,8 +404,11 @@ def test_int_c_div_mod(x, y): assert int_c_div(~x, y) == -(abs(~x) // y) assert int_c_div( x,-y) == -(x // y) - if (x, y) == (sys.maxint, 1): - py.test.skip("would overflow") + + at given(strategies.integers(min_value=0, max_value=sys.maxint), + strategies.integers(min_value=1, max_value=sys.maxint)) +def test_int_c_div_mod_2(x, y): + assume((x, y) != (sys.maxint, 1)) # This case would overflow assert int_c_div(~x,-y) == +(abs(~x) // y) for x1 in [x, ~x]: for y1 in [y, -y]: diff --git a/rpython/rlib/test/test_rstacklet.py b/rpython/rlib/test/test_rstacklet.py --- a/rpython/rlib/test/test_rstacklet.py +++ b/rpython/rlib/test/test_rstacklet.py @@ -10,6 +10,8 @@ from rpython.config.translationoption import DEFL_ROOTFINDER_WITHJIT from rpython.rlib import rrandom, rgc from rpython.rlib.rarithmetic import intmask +from rpython.rlib.nonconst import NonConstant +from rpython.rlib import rvmprof from rpython.rtyper.lltypesystem import lltype, llmemory, rffi from rpython.translator.c.test.test_standalone import StandaloneTests @@ -273,7 +275,23 @@ llmemory.raw_free(raw) +# +# bah, we need to make sure that vmprof_execute_code is annotated, else +# rvmprof.c does not compile correctly +class FakeVMProfCode(object): + pass +rvmprof.register_code_object_class(FakeVMProfCode, lambda code: 'name') + at rvmprof.vmprof_execute_code("xcode1", lambda code, num: code) +def fake_vmprof_main(code, num): + return 42 +# + def entry_point(argv): + # + if NonConstant(False): + fake_vmprof_main(FakeVMProfCode(), 42) + # + # seed = 0 if len(argv) > 1: seed = int(argv[1]) From pypy.commits at gmail.com Tue Nov 21 00:32:34 2017 From: pypy.commits at gmail.com (rlamy) Date: Mon, 20 Nov 2017 21:32:34 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Do not use PyUnicode_Check in PyUnicode_AS_UNICODE, ever Message-ID: <5a13ba72.47b0df0a.849f3.fbb9@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93108:ecfbd8f62994 Date: 2017-11-21 05:32 +0000 http://bitbucket.org/pypy/pypy/changeset/ecfbd8f62994/ Log: Do not use PyUnicode_Check in PyUnicode_AS_UNICODE, ever This macro may be used to fill in an uninitialised, unrealised unicode object, but PyUnicode_Check realises it, and modifying a PyPy-linked PyUnicodeObject is a no-no... diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h --- a/pypy/module/cpyext/include/unicodeobject.h +++ b/pypy/module/cpyext/include/unicodeobject.h @@ -61,8 +61,7 @@ use PyUnicode_WRITE() and PyUnicode_READ(). */ #define PyUnicode_AS_UNICODE(op) \ - (assert(PyUnicode_Check(op)), \ - (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ + ((((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ PyUnicode_AsUnicode((PyObject *)(op))) #define PyUnicode_AS_DATA(op) \ diff --git a/pypy/module/cpyext/test/_widechar.c b/pypy/module/cpyext/test/_widechar.c new file mode 100644 --- /dev/null +++ b/pypy/module/cpyext/test/_widechar.c @@ -0,0 +1,47 @@ +// Enable asserts. This used to fail in that case only. +#undef NDEBUG + +#include "Python.h" + +static PyObject * +test_widechar(PyObject *self) +{ + const wchar_t invalid[1] = {(wchar_t)0x110000u}; + PyObject *wide; + + wide = PyUnicode_FromUnicode(NULL, 1); + if (wide == NULL) + return NULL; + PyUnicode_AS_UNICODE(wide)[0] = invalid[0]; + if (_PyUnicode_Ready(wide) < 0) { + return NULL; + } + return wide; +} + +static PyMethodDef TestMethods[] = { + {"test_widechar", (PyCFunction)test_widechar, METH_NOARGS}, + {NULL, NULL} /* sentinel */ +}; + +static struct PyModuleDef _testcapimodule = { + PyModuleDef_HEAD_INIT, + "_widechar", + NULL, + -1, + TestMethods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__widechar(void) +{ + PyObject *m; + m = PyModule_Create(&_testcapimodule); + if (m == NULL) + return NULL; + return m; +} diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py --- a/pypy/module/cpyext/test/test_unicodeobject.py +++ b/pypy/module/cpyext/test/test_unicodeobject.py @@ -356,6 +356,10 @@ print(repr(wide), repr(utf8)) assert wide == utf8 + def test_invalid(self): + m = self.import_module('_widechar') + raises(ValueError, m.test_widechar) + class TestUnicode(BaseApiTest): def test_unicodeobject(self, space): From pypy.commits at gmail.com Tue Nov 21 04:30:34 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 01:30:34 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: interpreter fixes Message-ID: <5a13f23a.aea6df0a.cd033.ca65@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93109:86548802b11b Date: 2017-11-21 10:29 +0100 http://bitbucket.org/pypy/pypy/changeset/86548802b11b/ Log: interpreter fixes diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -231,11 +231,14 @@ return s[pt:ps] def decode_utf8_recode(space, s, ps, end, recode_encoding): - lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, end) - w_v = unicodehelper.encode(space, space.newutf8(s[ps:end], lgt, flag), + p = ps + while p < end and ord(s[p]) & 0x80: + p += 1 + lgt, flag = unicodehelper.check_utf8_or_raise(space, s, ps, p) + w_v = unicodehelper.encode(space, space.newutf8(s[ps:p], lgt, flag), recode_encoding) v = space.bytes_w(w_v) - return v, ps + return v, p def raise_app_valueerror(space, msg): raise OperationError(space.w_ValueError, space.newtext(msg)) diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py --- a/pypy/interpreter/pyparser/test/test_parsestring.py +++ b/pypy/interpreter/pyparser/test/test_parsestring.py @@ -10,7 +10,7 @@ assert space.str_w(w_ret) == value elif isinstance(value, unicode): assert space.type(w_ret) == space.w_unicode - assert space.unicode_w(w_ret) == value + assert space.utf8_w(w_ret).decode('utf8') == value else: assert False @@ -102,7 +102,4 @@ def test_decode_unicode_utf8(self): buf = parsestring.decode_unicode_utf8(self.space, 'u"\xf0\x9f\x92\x8b"', 2, 6) - if sys.maxunicode == 65535: - assert buf == r"\U0000d83d\U0000dc8b" - else: - assert buf == r"\U0001f48b" + assert buf == r"\U0001f48b" diff --git a/pypy/interpreter/test/test_objspace.py b/pypy/interpreter/test/test_objspace.py --- a/pypy/interpreter/test/test_objspace.py +++ b/pypy/interpreter/test/test_objspace.py @@ -216,9 +216,7 @@ space = self.space w = space.wrap assert space.text0_w(w("123")) == "123" - exc = space.raises_w(space.w_TypeError, space.text0_w, w("123\x004")) - assert space.unicode0_w(w(u"123")) == u"123" - exc = space.raises_w(space.w_TypeError, space.unicode0_w, w(u"123\x004")) + space.raises_w(space.w_TypeError, space.text0_w, w("123\x004")) def test_getindex_w(self): w_instance1 = self.space.appexec([], """(): diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -351,12 +351,12 @@ try: chr = r_uint(int(s[pos:pos+digits], 16)) except ValueError: - aaaa endinpos = pos while s[endinpos] in hexdigits: endinpos += 1 res, pos = errorhandler(errors, encoding, message, s, pos-2, endinpos) + size, flag = rutf8.check_utf8(res, True) builder.append(res) else: # when we get here, chr is a 32-bit unicode character @@ -1392,7 +1392,7 @@ while pos < size: ch = s[pos] - c = mapping.get(ch, ERROR_CHAR) + c = mapping.get(ord(ch), ERROR_CHAR) if c == ERROR_CHAR: r, pos = errorhandler(errors, "charmap", "character maps to ", @@ -1407,20 +1407,17 @@ def utf8_encode_charmap(s, errors, errorhandler=None, mapping=None): - YYY + size = len(s) if mapping is None: - return unicode_encode_latin_1(s, size, errors, - errorhandler=errorhandler) - - if errorhandler is None: - errorhandler = default_unicode_error_encode + return utf8_encode_latin_1(s, size, errors, + errorhandler=errorhandler) if size == 0: return '' result = StringBuilder(size) pos = 0 while pos < size: - ch = s[pos] + ch = rutf8.codepoint_at_pos(s, pos) c = mapping.get(ch, '') if len(c) == 0: @@ -1428,9 +1425,10 @@ collend = pos + 1 while collend < size and mapping.get(s[collend], '') == '': collend += 1 - ru, rs, pos = errorhandler(errors, "charmap", - "character maps to ", - s, pos, collend) + rs, pos = errorhandler(errors, "charmap", + "character maps to ", + s, pos, collend) + XXXX if rs is not None: # py3k only result.append(rs) @@ -1445,6 +1443,6 @@ result.append(c2) continue result.append(c) - pos += 1 + pos = rutf8.next_codepoint_pos(s, pos) return result.build() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -551,10 +551,10 @@ # get the character from the mapping if self.mapping_w is not None: - w_ch = self.mapping_w[ord(ch)] + w_ch = self.mapping_w[ch] else: try: - w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) + w_ch = space.getitem(self.w_mapping, space.newint(ch)) except OperationError as e: if not e.match(space, space.w_LookupError): raise @@ -587,7 +587,7 @@ # get the character from the mapping try: - w_ch = space.getitem(self.w_mapping, space.newint(ord(ch))) + w_ch = space.getitem(self.w_mapping, space.newint(ch)) except OperationError as e: if not e.match(space, space.w_LookupError): raise @@ -633,8 +633,8 @@ return space.newtuple([space.newutf8(result, lgt, flag), space.newint(consumed)]) - at unwrap_spec(utf8='utf8', errors='text_or_none') -def charmap_encode(space, utf8, errors="strict", w_mapping=None): + at unwrap_spec(errors='text_or_none') +def charmap_encode(space, w_unicode, errors="strict", w_mapping=None): from pypy.interpreter import unicodehelper if errors is None: @@ -645,9 +645,10 @@ mapping = Charmap_Encode(space, w_mapping) state = space.fromcache(CodecState) - result = unicodehelper.unicode_encode_charmap( - utf8, errors, state.encode_error_handler, mapping) - return space.newtuple([space.newbytes(result), space.newint(len(uni))]) + w_uni = unicodehelper.convert_arg_to_w_unicode(space, w_unicode) + result = unicodehelper.utf8_encode_charmap( + space.utf8_w(w_uni), errors, state.encode_error_handler, mapping) + return space.newtuple([space.newbytes(result), space.newint(w_uni._len())]) @unwrap_spec(chars='utf8') From pypy.commits at gmail.com Tue Nov 21 08:03:46 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 05:03:46 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix all the tests in codecs until test_ztranslation Message-ID: <5a142432.0b0f1c0a.ac5e3.472b@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93110:c7109cb7f6be Date: 2017-11-21 14:03 +0100 http://bitbucket.org/pypy/pypy/changeset/c7109cb7f6be/ Log: fix all the tests in codecs until test_ztranslation diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -173,8 +173,13 @@ r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) - res.append(r) for j in range(pos - cur): + c = rutf8.codepoint_at_pos(r, j) + if c > 0xFF: + errorhandler("strict", 'latin1', + 'ordinal not in range(256)', s, + cur, cur + 1) + res.append(chr(c)) i = rutf8.next_codepoint_pos(s, i) cur = pos cur += 1 @@ -200,7 +205,12 @@ msg = "ordinal not in range(128)" r, newpos = errorhandler(errors, 'ascii', msg, utf8, pos, endpos) - for _ in range(newpos - pos): + for j in range(newpos - pos): + c = rutf8.codepoint_at_pos(r, j) + if c > 0x7F: + errorhandler("strict", 'ascii', + 'ordinal not in range(128)', utf8, + pos, pos + 1) i = rutf8.next_codepoint_pos(utf8, i) pos = newpos res.append(r) @@ -364,7 +374,7 @@ message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) - size, flag = rutf8.check_utf8(res) + size, flag = rutf8.check_utf8(res, True) builder.append(res) else: rutf8.unichr_as_utf8_append(builder, chr, True) @@ -778,21 +788,25 @@ if base64bits > 0: # left-over bits if base64bits >= 6: # We've seen at least one base-64 character - aaa pos += 1 msg = "partial character in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) + reslen, resflags = rutf8.check_utf8(res, True) + outsize += reslen + flag = combine_flags(flag, resflags) result.append(res) continue else: # Some bits remain; they should be zero if base64buffer != 0: - bbb pos += 1 msg = "non-zero padding bits in shift sequence" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) + reslen, resflags = rutf8.check_utf8(res, True) + outsize += reslen + flag = combine_flags(flag, resflags) result.append(res) continue @@ -826,11 +840,13 @@ outsize += 1 pos += 1 else: - yyy startinpos = pos pos += 1 msg = "unexpected special character" res, pos = errorhandler(errors, 'utf7', msg, s, pos-1, pos) + reslen, resflags = rutf8.check_utf8(res, True) + outsize += reslen + flag = combine_flags(flag, resflags) result.append(res) # end of string @@ -973,7 +989,7 @@ else: bo = 1 if size == 0: - return u'', 0, bo + return '', 0, 0, rutf8.FLAG_ASCII, bo if bo == -1: # force little endian ihi = 1 @@ -1182,7 +1198,7 @@ else: bo = 1 if size == 0: - return u'', 0, bo + return '', 0, 0, rutf8.FLAG_ASCII, bo if bo == -1: # force little endian iorder = [0, 1, 2, 3] @@ -1409,40 +1425,43 @@ mapping=None): size = len(s) if mapping is None: - return utf8_encode_latin_1(s, size, errors, - errorhandler=errorhandler) + return utf8_encode_latin_1(s, errors, errorhandler=errorhandler) if size == 0: return '' result = StringBuilder(size) pos = 0 + index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) c = mapping.get(ch, '') if len(c) == 0: # collect all unencodable chars. Important for narrow builds. - collend = pos + 1 - while collend < size and mapping.get(s[collend], '') == '': - collend += 1 - rs, pos = errorhandler(errors, "charmap", + collend = rutf8.next_codepoint_pos(s, pos) + endindex = index + 1 + while collend < size and mapping.get(rutf8.codepoint_at_pos(s, collend), '') == '': + collend = rutf8.next_codepoint_pos(s, collend) + endindex += 1 + rs, endindex = errorhandler(errors, "charmap", "character maps to ", - s, pos, collend) - XXXX - if rs is not None: - # py3k only - result.append(rs) - continue - for ch2 in ru: - c2 = mapping.get(ch2, '') - if len(c2) == 0: + s, index, endindex) + j = 0 + for _ in range(endindex - index): + ch2 = rutf8.codepoint_at_pos(rs, j) + ch2 = mapping.get(ch2, '') + if not ch2: errorhandler( "strict", "charmap", "character maps to ", - s, pos, pos + 1) - result.append(c2) + s, index, index + 1) + result.append(ch2) + index += 1 + j = rutf8.next_codepoint_pos(rs, j) + pos = rutf8.next_codepoint_pos(s, pos) continue result.append(c) + index += 1 pos = rutf8.next_codepoint_pos(s, pos) return result.build() diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -1,6 +1,6 @@ from rpython.rlib import jit, rutf8 from rpython.rlib.objectmodel import we_are_translated, not_rpython -from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rstring import StringBuilder from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault @@ -241,33 +241,42 @@ "don't know how to handle %T in error callback", w_exc) def backslashreplace_errors(space, w_exc): + from pypy.interpreter import unicodehelper + check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + space.realutf8_w(w_obj) # for errors + w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) - builder = UnicodeBuilder() + start = w_obj._index_to_byte(start) + end = w_obj._index_to_byte(end) + builder = StringBuilder() + obj = w_obj._utf8 pos = start while pos < end: - oc = ord(obj[pos]) + oc = rutf8.codepoint_at_pos(obj, pos) num = hex(oc) if (oc >= 0x10000): - builder.append(u"\\U") + builder.append("\\U") zeros = 8 elif (oc >= 0x100): - builder.append(u"\\u") + builder.append("\\u") zeros = 4 else: - builder.append(u"\\x") + builder.append("\\x") zeros = 2 lnum = len(num) nb = zeros + 2 - lnum # num starts with '0x' if nb > 0: - builder.append_multiple_char(u'0', nb) - builder.append_slice(unicode(num), 2, lnum) - pos += 1 - return space.newtuple([space.newunicode(builder.build()), w_end]) + builder.append_multiple_char('0', nb) + builder.append_slice(num, 2, lnum) + pos = rutf8.next_codepoint_pos(obj, pos) + r = builder.build() + lgt, flag = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt, flag), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) @@ -489,7 +498,7 @@ @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, w_final=WrappedDefault(False)) def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): - from pypy.interpreter.unicodehelper import DecodeWrapper + from pypy.interpreter.unicodehelper import str_decode_utf_16_helper if errors is None: errors = 'strict' @@ -504,16 +513,17 @@ consumed = len(data) if final: consumed = 0 - res, consumed, byteorder = runicode.str_decode_utf_16_helper( - data, len(data), errors, final, - DecodeWrapper(state.decode_error_handler).handle, byteorder) - return space.newtuple([space.newunicode(res), space.newint(consumed), + res, consumed, lgt, flag, byteorder = str_decode_utf_16_helper( + data, errors, final, + state.decode_error_handler, byteorder) + return space.newtuple([space.newutf8(res, lgt, flag), + space.newint(consumed), space.newint(byteorder)]) @unwrap_spec(data='bufferstr', errors='text_or_none', byteorder=int, w_final=WrappedDefault(False)) def utf_32_ex_decode(space, data, errors='strict', byteorder=0, w_final=None): - from pypy.interpreter.unicodehelper import DecodeWrapper + from pypy.interpreter.unicodehelper import str_decode_utf_32_helper final = space.is_true(w_final) state = space.fromcache(CodecState) @@ -526,10 +536,11 @@ consumed = len(data) if final: consumed = 0 - res, consumed, byteorder = runicode.str_decode_utf_32_helper( - data, len(data), errors, final, - DecodeWrapper(state.decode_error_handler).handle, byteorder) - return space.newtuple([space.newunicode(res), space.newint(consumed), + res, consumed, lgt, flag, byteorder = str_decode_utf_32_helper( + data, errors, final, + state.decode_error_handler, byteorder) + return space.newtuple([space.newutf8(res, lgt, flag), + space.newint(consumed), space.newint(byteorder)]) # ____________________________________________________________ diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -592,11 +592,11 @@ def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) - return (u"\x01", 1) + return (u"\x01", 4) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: - assert res == u"\u0000\u0001\u0000" # UCS4 build + assert res == u"\u0000\u0001" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build From pypy.commits at gmail.com Tue Nov 21 08:08:03 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 05:08:03 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: simple fixes in fake objspace Message-ID: <5a142533.43aadf0a.d1e02.1f1b@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93111:df28f6398687 Date: 2017-11-21 14:07 +0100 http://bitbucket.org/pypy/pypy/changeset/df28f6398687/ Log: simple fixes in fake objspace diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -2143,7 +2143,7 @@ 'float_w', 'uint_w', 'bigint_w', - 'unicode_w', + 'utf8_w', 'unwrap', 'is_true', 'is_w', diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -209,7 +209,7 @@ def newbytes(self, x): return w_some_obj() - def newutf8(self, x, l): + def newutf8(self, x, l, f): return w_some_obj() newtext = newbytes From pypy.commits at gmail.com Tue Nov 21 09:20:39 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 06:20:39 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: some improvements for xmlcharrefreplace Message-ID: <5a143637.88c5df0a.6bb8.26e3@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93112:fd1b64ce9b80 Date: 2017-11-21 15:19 +0100 http://bitbucket.org/pypy/pypy/changeset/fd1b64ce9b80/ Log: some improvements for xmlcharrefreplace diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -164,26 +164,31 @@ while i < size: if ord(s[i]) <= 0x7F: res.append(s[i]) + i += 1 + cur += 1 else: oc = rutf8.codepoint_at_pos(s, i) if oc <= 0xFF: res.append(chr(oc)) - i += 1 + cur += 1 + i = rutf8.next_codepoint_pos(s, i) else: r, pos = errorhandler(errors, 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) for j in range(pos - cur): + i = rutf8.next_codepoint_pos(s, i) + + j = 0 + while j < len(r): c = rutf8.codepoint_at_pos(r, j) if c > 0xFF: errorhandler("strict", 'latin1', 'ordinal not in range(256)', s, cur, cur + 1) + j = rutf8.next_codepoint_pos(r, j) res.append(chr(c)) - i = rutf8.next_codepoint_pos(s, i) cur = pos - cur += 1 - i += 1 r = res.build() return r diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -215,27 +215,30 @@ "don't know how to handle %T in error callback", w_exc) def xmlcharrefreplace_errors(space, w_exc): + from pypy.interpreter import unicodehelper + check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): - obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object'))) + w_obj = space.getattr(w_exc, space.newtext('object')) + space.realutf8_w(w_obj) # weeoes + w_obj = unicodehelper.convert_arg_to_w_unicode(space, w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) - builder = UnicodeBuilder() + start = w_obj._index_to_byte(start) + end = w_obj._index_to_byte(end) + builder = StringBuilder() pos = start + obj = w_obj._utf8 while pos < end: - code = ord(obj[pos]) - if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF and - pos + 1 < end and 0xDC00 <= ord(obj[pos+1]) <= 0xDFFF): - code = (code & 0x03FF) << 10 - code |= ord(obj[pos+1]) & 0x03FF - code += 0x10000 - pos += 1 - builder.append(u"&#") - builder.append(unicode(str(code))) - builder.append(u";") - pos += 1 - return space.newtuple([space.newunicode(builder.build()), w_end]) + code = rutf8.codepoint_at_pos(obj, pos) + builder.append("&#") + builder.append(str(code)) + builder.append(";") + pos = rutf8.next_codepoint_pos(obj, pos) + r = builder.build() + lgt, flag = rutf8.check_utf8(r, True) + return space.newtuple([space.newutf8(r, lgt, flag), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc) diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py --- a/pypy/module/_codecs/test/test_codecs.py +++ b/pypy/module/_codecs/test/test_codecs.py @@ -750,3 +750,9 @@ assert _codecs.unicode_escape_decode(b) == (u'', 0) assert _codecs.raw_unicode_escape_decode(b) == (u'', 0) assert _codecs.unicode_internal_decode(b) == (u'', 0) + + def test_xmlcharrefreplace(self): + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace') + assert r == 'ሴ\x80⍅y\xab' + r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace') + assert r == 'ሴ€⍅y«' From pypy.commits at gmail.com Tue Nov 21 09:52:41 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 06:52:41 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: some rpython fixes Message-ID: <5a143db9.968ddf0a.433a7.990f@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93113:5ffbd0a736d9 Date: 2017-11-21 15:51 +0100 http://bitbucket.org/pypy/pypy/changeset/5ffbd0a736d9/ Log: some rpython fixes diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -211,12 +211,16 @@ r, newpos = errorhandler(errors, 'ascii', msg, utf8, pos, endpos) for j in range(newpos - pos): + i = rutf8.next_codepoint_pos(utf8, i) + + j = 0 + while j < len(r): c = rutf8.codepoint_at_pos(r, j) if c > 0x7F: errorhandler("strict", 'ascii', 'ordinal not in range(128)', utf8, - pos, pos + 1) - i = rutf8.next_codepoint_pos(utf8, i) + pos, pos + 1) + j = rutf8.next_codepoint_pos(r, j) pos = newpos res.append(r) else: @@ -382,8 +386,8 @@ size, flag = rutf8.check_utf8(res, True) builder.append(res) else: - rutf8.unichr_as_utf8_append(builder, chr, True) - flag = rutf8.get_flag_from_code(chr) + rutf8.unichr_as_utf8_append(builder, intmask(chr), True) + flag = rutf8.get_flag_from_code(intmask(chr)) pos += digits size = 1 @@ -755,27 +759,31 @@ if inShift: # in a base-64 section if _utf7_IS_BASE64(ord(ch)): #consume a base-64 character base64buffer = (base64buffer << 6) | _utf7_FROM_BASE64(ch) + assert base64buffer >= 0 base64bits += 6 pos += 1 if base64bits >= 16: # enough bits for a UTF-16 value outCh = base64buffer >> (base64bits - 16) + assert outCh >= 0 base64bits -= 16 base64buffer &= (1 << base64bits) - 1 # clear high bits assert outCh <= 0xffff if surrogate: # expecting a second surrogate if outCh >= 0xDC00 and outCh <= 0xDFFF: - xxxx - result.append( - UNICHR((((surrogate & 0x3FF)<<10) | - (outCh & 0x3FF)) + 0x10000)) + code = (((surrogate & 0x3FF)<<10) | + (outCh & 0x3FF)) + 0x10000 + rutf8.unichr_as_utf8_append(result, code) + outsize += 1 + flag = combine_flags(flag, rutf8.FLAG_REGULAR) surrogate = 0 continue else: - YYYY - result.append(unichr(surrogate)) + rutf8.unichr_as_utf8_append(result, surrogate) + flag = rutf8.FLAG_HAS_SURROGATES + outsize += 1 surrogate = 0 # Not done with outCh: falls back to next line if outCh >= 0xD800 and outCh <= 0xDBFF: @@ -784,6 +792,7 @@ else: flag = combine_flags(flag, rutf8.unichr_to_flag(outCh)) outsize += 1 + assert outCh >= 0 rutf8.unichr_as_utf8_append(result, outCh, True) else: diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -19,7 +19,7 @@ from rpython.rlib.objectmodel import enforceargs, we_are_translated from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit -from rpython.rlib.rarithmetic import r_uint, intmask +from rpython.rlib.rarithmetic import r_uint from rpython.rlib.unicodedata import unicodedb from rpython.rtyper.lltypesystem import lltype, rffi @@ -27,6 +27,7 @@ def unichr_as_utf8(code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string """ + assert code >= 0 code = r_uint(code) if code <= r_uint(0x7F): # Encode ASCII From pypy.commits at gmail.com Tue Nov 21 11:19:51 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 08:19:51 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: general progress Message-ID: <5a145227.48d31c0a.f36c0.9cbc@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93114:cefc9ed0b4c5 Date: 2017-11-21 17:19 +0100 http://bitbucket.org/pypy/pypy/changeset/cefc9ed0b4c5/ Log: general progress diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py --- a/pypy/interpreter/pyparser/parsestring.py +++ b/pypy/interpreter/pyparser/parsestring.py @@ -74,8 +74,8 @@ substr = s[ps : q] if rawmode or '\\' not in s[ps:]: if need_encoding: - utf, (lgt, flag) = unicodehelper.decode_utf8(space, substr) - w_u = space.newutf8(utf, lgt, flag) + lgt, flag = unicodehelper.check_utf8_or_raise(space, substr) + w_u = space.newutf8(substr, lgt, flag) w_v = unicodehelper.encode(space, w_u, encoding) return w_v else: diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -1094,9 +1094,9 @@ byteorder = BYTEORDER pos = 0 + index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) - pos = rutf8.next_codepoint_pos(s, pos) if ch < 0xD800: _STORECHAR(result, ch, byteorder) @@ -1106,27 +1106,27 @@ elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: - ru, pos = errorhandler(errors, public_encoding_name, + ru, newindex = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - xxx - #if rs is not None: - # # py3k only - # if len(rs) % 2 != 0: - # errorhandler('strict', public_encoding_name, - # 'surrogates not allowed', - # s, pos-1, pos) - # result.append(rs) - # continue - for ch in ru: + for j in range(newindex - index): + pos = rutf8.next_codepoint_pos(s, pos) + j = 0 + while j < len(ru): + ch = rutf8.codepoint_at_pos(ru, j) if ord(ch) < 0xD800: _STORECHAR(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos-1, pos) + j = rutf8.next_codepoint_pos(ru, j) + index = newindex continue + pos = rutf8.next_codepoint_pos(s, pos) + index += 1 + return result.build() def utf8_encode_utf_16(s, errors, @@ -1285,32 +1285,30 @@ byteorder = BYTEORDER pos = 0 + index = 0 while pos < size: ch = rutf8.codepoint_at_pos(s, pos) pos = rutf8.next_codepoint_pos(s, pos) - ch2 = 0 if not allow_surrogates and 0xD800 <= ch < 0xE000: - ru, pos = errorhandler(errors, public_encoding_name, + ru, newindex = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos-1, pos) - XXX - if rs is not None: - # py3k only - if len(rs) % 4 != 0: - errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) - result.append(rs) - continue - for ch in ru: + for j in range(newindex - index): + pos = rutf8.next_codepoint_pos(s, pos) + j = 0 + while j < len(ru): + ch = rutf8.codepoint_at_pos(ru, j) if ord(ch) < 0xD800: _STORECHAR32(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, - 'surrogates not allowed', - s, pos-1, pos) + 'surrogates not allowed', + s, pos-1, pos) + j = rutf8.next_codepoint_pos(ru, j) + index = newindex continue _STORECHAR32(result, ch, byteorder) + index += 1 return result.build() diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -2,8 +2,9 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.translator.tool.cbuild import ExternalCompilationInfo from rpython.translator import cdir +from rpython.rlib import rutf8 -UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD' +UNICODE_REPLACEMENT_CHARACTER = u'\uFFFD'.encode("utf8") class EncodeDecodeError(Exception): @@ -126,7 +127,7 @@ errorcb, namecb, stringdata) src = pypy_cjk_dec_outbuf(decodebuf) length = pypy_cjk_dec_outlen(decodebuf) - return rffi.wcharpsize2unicode(src, length) + return rffi.wcharpsize2utf8(src, length) def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata): @@ -148,7 +149,7 @@ if errors == "strict": raise EncodeDecodeError(start, end, reason) elif errors == "ignore": - replace = u"" + replace = "" elif errors == "replace": replace = UNICODE_REPLACEMENT_CHARACTER else: @@ -156,8 +157,12 @@ replace, end = errorcb(errors, namecb, reason, stringdata, start, end) # 'replace' is RPython unicode here - with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf: - r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end) + lgt, _ = rutf8.check_utf8(replace, True) + inbuf = rffi.utf82wcharp(replace, lgt) + try: + r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, lgt, end) + finally: + lltype.free(inbuf, flavor='raw') if r == MBERR_NOMEMORY: raise MemoryError @@ -256,6 +261,7 @@ replace = "?" else: assert errorcb + XXX retu, rets, end = errorcb(errors, namecb, reason, unicodedata.encode("utf8"), start, end) if rets is not None: diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -1,3 +1,6 @@ + +from rpython.rlib import rutf8 + from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.gateway import interp2app, unwrap_spec from pypy.interpreter.typedef import TypeDef @@ -18,13 +21,14 @@ state = space.fromcache(CodecState) # try: - u_output = c_codecs.decode(self.codec, input, errors, + utf8_output = c_codecs.decode(self.codec, input, errors, state.decode_error_handler, self.name) except c_codecs.EncodeDecodeError as e: raise wrap_unicodedecodeerror(space, e, input, self.name) except RuntimeError: raise wrap_runtimeerror(space) - return space.newtuple([space.newunicode(u_output), + lgt, flag = rutf8.check_utf8(utf8_output, True) + return space.newtuple([space.newutf8(utf8_output, lgt, flag), space.newint(len(input))]) @unwrap_spec(input='utf8', errors="text_or_none") @@ -74,7 +78,7 @@ space.newtext(e.reason)])) def wrap_unicodeencodeerror(space, e, input, inputlen, name): - flag = 13 + _, flag = rutf8.check_utf8(input, True) raise OperationError( space.w_UnicodeEncodeError, space.newtuple([ diff --git a/pypy/objspace/std/newformat.py b/pypy/objspace/std/newformat.py --- a/pypy/objspace/std/newformat.py +++ b/pypy/objspace/std/newformat.py @@ -841,8 +841,7 @@ prefix = "0x" as_str = value.format(LONG_DIGITS[:base], prefix) if self.is_unicode: - XXX - return as_str.decode("latin-1") + return rutf8.decode_latin_1(as_str) return as_str def _int_to_base(self, base, value): diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1009,6 +1009,29 @@ wcharp2unicoden, wcharpsize2unicode, unicode2wchararray, unicode2rawmem, ) = make_string_mappings(unicode) +def wcharpsize2utf8(w, size): + """ Helper to convert WCHARP pointer to utf8 in one go. + Equivalent to wcharpsize2unicode().encode("utf8") + """ + from rpython.rlib import rutf8 + + s = StringBuilder(size) + for i in range(size): + rutf8.unichr_as_utf8_append(s, ord(w[i])) + return s.build() + +def utf82wcharp(utf8, utf8len): + from rpython.rlib import rutf8 + + w = lltype.malloc(CWCHARP.TO, utf8len, flavor='raw') + i = 0 + index = 0 + while i < len(utf8): + w[index] = unichr(rutf8.codepoint_at_pos(utf8, i)) + i = rutf8.next_codepoint_pos(utf8, i) + index += 1 + return w + # char** CCHARPP = lltype.Ptr(lltype.Array(CCHARP, hints={'nolength': True})) diff --git a/rpython/rtyper/lltypesystem/test/test_rffi.py b/rpython/rtyper/lltypesystem/test/test_rffi.py --- a/rpython/rtyper/lltypesystem/test/test_rffi.py +++ b/rpython/rtyper/lltypesystem/test/test_rffi.py @@ -590,6 +590,14 @@ res = fn(expected_extra_mallocs=range(30)) assert res == 32 * len(d) + def test_wcharp_to_utf8(self): + wchar = lltype.malloc(CWCHARP.TO, 3, flavor='raw') + wchar[0] = u'\u1234' + wchar[1] = u'\x80' + wchar[2] = u'a' + assert wcharpsize2utf8(wchar, 3).decode("utf8") == u'\u1234\x80a' + lltype.free(wchar, flavor='raw') + class TestRffiInternals: def test_struct_create(self): X = CStruct('xx', ('one', INT)) From pypy.commits at gmail.com Tue Nov 21 12:02:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 21 Nov 2017 09:02:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Advance self.pos also when reading only from self.buffer Message-ID: <5a145c1b.92831c0a.46c3b.722f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93115:36daba4180a3 Date: 2017-11-21 17:02 +0000 http://bitbucket.org/pypy/pypy/changeset/36daba4180a3/ Log: Advance self.pos also when reading only from self.buffer diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py --- a/pypy/module/_io/interp_bufferedio.py +++ b/pypy/module/_io/interp_bufferedio.py @@ -890,6 +890,7 @@ have = self._readahead() if have >= length: rwbuffer.setslice(0, self.buffer[self.pos:self.pos + length]) + self.pos += length return space.newint(length) written = 0 if have > 0: diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py --- a/pypy/module/_io/test/test_bufferedio.py +++ b/pypy/module/_io/test/test_bufferedio.py @@ -214,6 +214,15 @@ assert n == 1 assert buf[:n] == b'c' + bufio = _io.BufferedReader(MockIO(), buffer_size=20) + buf = bytearray(2) + bufio.peek(3) + assert bufio.readinto1(buf) == 2 + assert buf == b'ab' + n = bufio.readinto1(buf) + assert n == 1 + assert buf[:n] == b'c' + def test_seek(self): import _io raw = _io.FileIO(self.tmpfile) From pypy.commits at gmail.com Tue Nov 21 12:26:17 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 09:26:17 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix checking for unichr range Message-ID: <5a1461b9.11c6df0a.f8192.b810@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93116:0021cc161b99 Date: 2017-11-21 18:25 +0100 http://bitbucket.org/pypy/pypy/changeset/0021cc161b99/ Log: fix checking for unichr range diff --git a/pypy/module/__builtin__/operation.py b/pypy/module/__builtin__/operation.py --- a/pypy/module/__builtin__/operation.py +++ b/pypy/module/__builtin__/operation.py @@ -24,16 +24,15 @@ @unwrap_spec(code=int) def unichr(space, code): "Return a Unicode string of one character with the given ordinal." - try: - s = rutf8.unichr_as_utf8(code, allow_surrogates=True) - except ValueError: - raise oefmt(space.w_ValueError, "unichr() arg out of range") - if code < 0x80: + if code < 0 or code > 0x10FFFF: + raise oefmt(space.w_ValueError, "unichr() arg out of range") + elif code < 0x80: flag = rutf8.FLAG_ASCII elif 0xD800 <= code <= 0xDFFF: flag = rutf8.FLAG_HAS_SURROGATES else: flag = rutf8.FLAG_REGULAR + s = rutf8.unichr_as_utf8(code, allow_surrogates=True) return space.newutf8(s, 1, flag) def len(space, w_obj): From pypy.commits at gmail.com Tue Nov 21 13:38:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 21 Nov 2017 10:38:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Fix traceback.print_exception() when exc.offset == 0 Message-ID: <5a14729b.c78c1c0a.ebbca.df2d@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93117:e37a09d8450a Date: 2017-11-21 18:38 +0000 http://bitbucket.org/pypy/pypy/changeset/e37a09d8450a/ Log: Fix traceback.print_exception() when exc.offset == 0 diff --git a/lib-python/3/traceback.py b/lib-python/3/traceback.py --- a/lib-python/3/traceback.py +++ b/lib-python/3/traceback.py @@ -544,8 +544,8 @@ yield ' {}\n'.format(badline.strip()) if offset is not None: caretspace = badline.rstrip('\n') - offset = min(len(caretspace), offset) - 1 - caretspace = caretspace[:offset].lstrip() + # bug in CPython: the case offset==0 is mishandled + caretspace = caretspace[:offset].lstrip()[:-1] # non-space whitespace (likes tabs) must be kept for alignment caretspace = ((c.isspace() and c or ' ') for c in caretspace) yield ' {}^\n'.format(''.join(caretspace)) From pypy.commits at gmail.com Tue Nov 21 14:42:47 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 21 Nov 2017 11:42:47 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Force recompilation of _testcapimodule.c (due to ecfbd8f62994) Message-ID: <5a1481b7.0ea6df0a.3586e.5cf6@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93118:382fb81ffff8 Date: 2017-11-21 19:42 +0000 http://bitbucket.org/pypy/pypy/changeset/382fb81ffff8/ Log: Force recompilation of _testcapimodule.c (due to ecfbd8f62994) diff --git a/lib_pypy/_pypy_testcapi.py b/lib_pypy/_pypy_testcapi.py --- a/lib_pypy/_pypy_testcapi.py +++ b/lib_pypy/_pypy_testcapi.py @@ -8,7 +8,8 @@ content = fid.read() # from cffi's Verifier() key = '\x00'.join([sys.version[:3], content]) - key += 'cpyext-gc-support-2' # this branch requires recompilation! + # change the key to force recompilation + key += '2017-11-21' if sys.version_info >= (3,): key = key.encode('utf-8') k1 = hex(binascii.crc32(key[0::2]) & 0xffffffff) From pypy.commits at gmail.com Tue Nov 21 15:09:40 2017 From: pypy.commits at gmail.com (fijal) Date: Tue, 21 Nov 2017 12:09:40 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: whack at cffi Message-ID: <5a148804.d58bdf0a.8cc33.0be7@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93119:c6537b6d453f Date: 2017-11-21 21:09 +0100 http://bitbucket.org/pypy/pypy/changeset/c6537b6d453f/ Log: whack at cffi diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -7,3 +7,5 @@ * better flag handling in split/splitlines maybe? * encode_error_handler has XXX * remove assertions from W_UnicodeObject.__init__ if all the builders pass +* what to do with error handlers that go backwards. There were tests + in test_codecs that would check for that diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1773,6 +1773,13 @@ "characters") return rstring.assert_str0(result) + def convert_arg_to_w_unicode(self, w_obj, strict=None): + # XXX why convert_to_w_unicode does something slightly different? + from pypy.objspace.std.unicodeobject import W_UnicodeObject + assert not hasattr(self, 'is_fake_objspace') + return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict) + + def realutf8_w(self, w_obj): # Like utf8_w(), but only works if w_obj is really of type # 'unicode'. On Python 3 this is the same as utf8_w(). diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -35,9 +35,7 @@ return raise_unicode_exception_encode def convert_arg_to_w_unicode(space, w_arg, strict=None): - from pypy.objspace.std.unicodeobject import W_UnicodeObject - assert not hasattr(space, 'is_fake_objspace') - return W_UnicodeObject.convert_arg_to_w_unicode(space, w_arg, strict) + return space.convert_arg_to_w_unicode(w_arg) # ____________________________________________________________ diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -63,11 +63,14 @@ return (w_value, len(s) + 1) elif space.isinstance_w(w_value, space.w_unicode): from pypy.module._cffi_backend import wchar_helper - u = space.unicode_w(w_value) - if self.ctitem.size == 2: - length = wchar_helper.unicode_size_as_char16(u) + w_u = space.convert_arg_to_w_unicode(w_value) + if self.citem.size == 4: + length = w_u._len() else: - length = wchar_helper.unicode_size_as_char32(u) + if not w_u._has_surrogates(): + length = w_u._len() + else: + length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len()) return (w_value, length + 1) else: explicitlength = space.getindex_w(w_value, space.w_OverflowError) diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -5,7 +5,7 @@ import sys from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask -from rpython.rlib import jit +from rpython.rlib import jit, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.tool import rfficache @@ -40,14 +40,15 @@ return ord(s[0]) def cast_unicode(self, w_ob): + import pdb + pdb.set_trace() space = self.space - s = space.unicode_w(w_ob) - try: - ordinal = wchar_helper.unicode_to_ordinal(s) - except ValueError: + w_u = space.convert_arg_to_w_unicode(w_ob) + if w_u._len() != 1: raise oefmt(space.w_TypeError, "cannot cast unicode string of length %d to ctype '%s'", - len(s), self.name) + w_u._len(), self.name) + ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0) return intmask(ordinal) def cast(self, w_ob): @@ -175,8 +176,10 @@ def convert_to_object(self, cdata): if self.is_signed_wchar: - unichardata = rffi.cast(rffi.CWCHARP, cdata) - return self.space.newunicode(unichardata[0]) + code = ord(rffi.cast(rffi.CWCHARP, cdata)[0]) + return self.space.newutf8( + rutf8.unichr_as_utf8(code), 1, + rutf8.get_flag_from_code(code)) else: value = misc.read_raw_ulong_data(cdata, self.size) # r_uint try: @@ -185,7 +188,8 @@ raise oefmt(self.space.w_ValueError, "char32_t out of range for " "conversion to unicode: %s", hex(e.ordinal)) - return self.space.newunicode(u) + return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1, + rutf8.get_flag_from_code(ord(u))) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -196,16 +200,7 @@ # returns a r_uint. If self.size == 2, it is smaller than 0x10000 space = self.space if space.isinstance_w(w_ob, space.w_unicode): - u = space.unicode_w(w_ob) - try: - ordinal = wchar_helper.unicode_to_ordinal(u) - except ValueError: - pass - else: - if self.size == 2 and ordinal > 0xffff: - raise self._convert_error("single character <= 0xFFFF", - w_ob) - return ordinal + return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0) elif (isinstance(w_ob, cdataobj.W_CData) and isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and w_ob.ctype.size == self.size): diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -91,11 +91,15 @@ from pypy.module._cffi_backend import wchar_helper if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) - s = space.unicode_w(w_ob) - if self.ctitem.size == 2: - n = wchar_helper.unicode_size_as_char16(s) + w_u = space.convert_arg_to_w_unicode(w_ob) + if self.size == 4: + n = w_u._len() else: - n = wchar_helper.unicode_size_as_char32(s) + if not w_u._has_surrogates(): + n = w_u._len() + else: + n = wchar_helper.unicode_size_as_char16(w_u._utf8, + w_u._len()) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer unicode string is too long for '%s' " @@ -328,11 +332,12 @@ length = len(s) + 1 elif space.isinstance_w(w_init, space.w_unicode): from pypy.module._cffi_backend import wchar_helper - u = space.unicode_w(w_init) + w_u = space.convert_arg_to_w_unicode(w_init) if self.ctitem.size == 2: - length = wchar_helper.unicode_size_as_char16(u) + length = wchar_helper.unicode_size_as_char16(w_u._utf8, + w_u._len()) else: - length = wchar_helper.unicode_size_as_char32(u) + length = w_u._len() length += 1 elif self.is_file: result = self.prepare_file(w_init) diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -1,10 +1,12 @@ +from rpython.rlib import rutf8 from rpython.rlib.objectmodel import specialize +from rpython.rlib.rstring import StringBuilder from rpython.rlib.rarithmetic import r_uint, r_ulonglong, intmask from rpython.rtyper.annlowlevel import llunicode from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw -SIZE_UNICODE = rffi.sizeof(lltype.UniChar) +SIZE_UNICODE = 4 if SIZE_UNICODE == 4: @@ -48,7 +50,7 @@ self.ordinal = ordinal def _unicode_from_wchar(ptr, length): - return rffi.wcharpsize2unicode(rffi.cast(rffi.CWCHARP, ptr), length) + return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length) if SIZE_UNICODE == 2: @@ -86,7 +88,7 @@ def unicode_from_char16(ptr, length): # 'ptr' is a pointer to 'length' 16-bit integers ptr = rffi.cast(rffi.USHORTP, ptr) - u = [u'\x00'] * length + u = StringBuilder(length) i = 0 j = 0 while j < length: @@ -97,10 +99,9 @@ if 0xDC00 <= ch2 <= 0xDFFF: ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 - u[i] = unichr(ch) + rutf8.unichr_as_utf8_append(u, ch) i += 1 - del u[i:] - return u''.join(u) + return u.build() @specialize.ll() @@ -121,23 +122,16 @@ return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen) -def unicode_size_as_char16(u): - result = len(u) - if SIZE_UNICODE == 4: - for i in range(result): - if ord(u[i]) > 0xFFFF: - result += 1 +def unicode_size_as_char16(u, len): + result = len + i = 0 + while i < len(u): + code = rutf8.codepoint_at_pos(u, i) + if code > 0xFFFF: + result += 1 + i = rutf8.next_codepoint_pos(u, i) return result -def unicode_size_as_char32(u): - result = len(u) - if SIZE_UNICODE == 2 and result > 1: - for i in range(result - 1): - if is_surrogate(u, i): - result -= 1 - return result - - def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero): # 'target_ptr' is a raw pointer to 'target_length' wchars; # we assume here that target_length == len(u). From pypy.commits at gmail.com Tue Nov 21 16:49:59 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 21 Nov 2017 13:49:59 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Adapt idlelib.CallTips for pypy and update some docstrings Message-ID: <5a149f87.ceb1df0a.8f6ae.257a@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93120:83b96bb9cf44 Date: 2017-11-21 21:49 +0000 http://bitbucket.org/pypy/pypy/changeset/83b96bb9cf44/ Log: Adapt idlelib.CallTips for pypy and update some docstrings diff --git a/lib-python/3/idlelib/CallTips.py b/lib-python/3/idlelib/CallTips.py --- a/lib-python/3/idlelib/CallTips.py +++ b/lib-python/3/idlelib/CallTips.py @@ -123,6 +123,15 @@ _first_param = re.compile('(?<=\()\w*\,?\s*') _default_callable_argspec = "See source or doc" +def _is_user_method(ob): + """Detect user methods on PyPy""" + return (isinstance(ob, types.MethodType) and + isinstance(ob.__code__, types.CodeType)) + +def _is_user_function(ob): + """Detect user methods on PyPy""" + return (isinstance(ob, types.FunctionType) and + isinstance(ob.__code__, types.CodeType)) def get_argspec(ob): '''Return a string describing the signature of a callable object, or ''. @@ -140,21 +149,21 @@ return argspec if isinstance(ob, type): fob = ob.__init__ - elif isinstance(ob_call, types.MethodType): + elif _is_user_method(ob_call): fob = ob_call else: fob = ob if (isinstance(fob, (types.FunctionType, types.MethodType)) and hasattr(fob.__code__, 'co_code')): # PyPy: not on argspec = inspect.formatargspec(*inspect.getfullargspec(fob)) - if (isinstance(ob, (type, types.MethodType)) or - isinstance(ob_call, types.MethodType)): + if (_is_user_method(ob) or _is_user_method(ob_call) or + (isinstance(ob, type) and _is_user_function(fob))): argspec = _first_param.sub("", argspec) lines = (textwrap.wrap(argspec, _MAX_COLS, subsequent_indent=_INDENT) if len(argspec) > _MAX_COLS else [argspec] if argspec else []) - if isinstance(ob_call, types.MethodType): + if _is_user_method(ob_call): doc = ob_call.__doc__ else: doc = getattr(ob, "__doc__", "") diff --git a/lib-python/3/idlelib/idle_test/test_calltips.py b/lib-python/3/idlelib/idle_test/test_calltips.py --- a/lib-python/3/idlelib/idle_test/test_calltips.py +++ b/lib-python/3/idlelib/idle_test/test_calltips.py @@ -63,7 +63,7 @@ gtest([].append, append_doc) gtest(List.append, append_doc) - gtest(types.MethodType, "method(function, instance)") + gtest(types.MethodType, "instancemethod(function, instance, class)") gtest(SB(), default_tip) def test_signature_wrap(self): diff --git a/pypy/module/cpyext/typeobject.py b/pypy/module/cpyext/typeobject.py --- a/pypy/module/cpyext/typeobject.py +++ b/pypy/module/cpyext/typeobject.py @@ -400,11 +400,13 @@ lltype.render_immortal(ptr.c_ml_name) rffi.setintfield(ptr, 'c_ml_flags', METH_VARARGS | METH_KEYWORDS) ptr.c_ml_doc = rffi.cast(rffi.CONST_CCHARP, rffi.str2charp( - "T.__new__(S, ...) -> a new object with type S, a subtype of T")) + "Create and return a new object. " + "See help(type) for accurate signature.")) lltype.render_immortal(ptr.c_ml_doc) state.new_method_def = ptr return ptr + def setup_new_method_def(space): ptr = get_new_method_def(space) ptr.c_ml_meth = rffi.cast(PyCFunction, llslot(space, tp_new_wrapper)) diff --git a/pypy/objspace/std/boolobject.py b/pypy/objspace/std/boolobject.py --- a/pypy/objspace/std/boolobject.py +++ b/pypy/objspace/std/boolobject.py @@ -40,7 +40,7 @@ @staticmethod @unwrap_spec(w_obj=WrappedDefault(False)) def descr_new(space, w_booltype, w_obj): - """T.__new__(S, ...) -> a new object with type S, a subtype of T""" + "Create and return a new object. See help(type) for accurate signature." space.w_bool.check_user_subclass(w_booltype) return space.newbool(space.is_true(w_obj)) diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py --- a/pypy/objspace/std/bytearrayobject.py +++ b/pypy/objspace/std/bytearrayobject.py @@ -658,7 +658,7 @@ """x.__imul__(y) <==> x*=y""" def __init__(): - """x.__init__(...) initializes x; see help(type(x)) for signature""" + """Initialize self. See help(type(self)) for accurate signature.""" def __iter__(): """x.__iter__() <==> iter(x)""" diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py --- a/pypy/objspace/std/intobject.py +++ b/pypy/objspace/std/intobject.py @@ -514,7 +514,7 @@ @staticmethod @unwrap_spec(w_x=WrappedDefault(0)) def descr_new(space, w_inttype, w_x, w_base=None): - """T.__new__(S, ...) -> a new object with type S, a subtype of T""" + "Create and return a new object. See help(type) for accurate signature." return _new_int(space, w_inttype, w_x, w_base) def descr_hash(self, space): diff --git a/pypy/objspace/std/listobject.py b/pypy/objspace/std/listobject.py --- a/pypy/objspace/std/listobject.py +++ b/pypy/objspace/std/listobject.py @@ -301,7 +301,7 @@ return self.strategy.find(self, w_item, start, end) def append(self, w_item): - """L.append(object) -- append object to end""" + """L.append(object) -> None -- append object to end""" self.strategy.append(self, w_item) def length(self): @@ -403,8 +403,7 @@ self.strategy.insert(self, index, w_item) def extend(self, w_iterable): - '''L.extend(iterable) -- extend list by appending - elements from the iterable''' + '''L.extend(iterable) -- extend list by appending elements from the iterable''' self.strategy.extend(self, w_iterable) def reverse(self): @@ -420,13 +419,13 @@ @staticmethod def descr_new(space, w_listtype, __args__): - """T.__new__(S, ...) -> a new object with type S, a subtype of T""" + "Create and return a new object. See help(type) for accurate signature." w_obj = space.allocate_instance(W_ListObject, w_listtype) w_obj.clear(space) return w_obj def descr_init(self, space, __args__): - """x.__init__(...) initializes x; see help(type(x)) for signature""" + """Initialize self. See help(type(self)) for accurate signature.""" # this is on the silly side w_iterable, = __args__.parse_obj( None, 'list', init_signature, init_defaults) @@ -603,8 +602,7 @@ self.reverse() def descr_count(self, space, w_value): - '''L.count(value) -> integer -- return number of - occurrences of value''' + '''L.count(value) -> integer -- return number of occurrences of value''' # needs to be safe against eq_w() mutating the w_list behind our back count = 0 i = 0 @@ -623,8 +621,8 @@ @unwrap_spec(index=int) def descr_pop(self, space, index=-1): - '''L.pop([index]) -> item -- remove and return item at - index (default last)''' + """L.pop([index]) -> item -- remove and return item at index (default last). +Raises IndexError if list is empty or index is out of range.""" length = self.length() if length == 0: raise oefmt(space.w_IndexError, "pop from empty list") @@ -639,7 +637,7 @@ raise oefmt(space.w_IndexError, "pop index out of range") def descr_clear(self, space): - '''L.clear() -- remove all items''' + """L.clear() -> None -- remove all items from L""" self.clear(space) def descr_copy(self, space): @@ -647,7 +645,8 @@ return self.clone() def descr_remove(self, space, w_value): - 'L.remove(value) -- remove first occurrence of value' + """L.remove(value) -> None -- remove first occurrence of value. +Raises ValueError if the value is not present.""" # needs to be safe against eq_w() mutating the w_list behind our back try: i = self.find(w_value, 0, sys.maxint) @@ -659,8 +658,8 @@ @unwrap_spec(w_start=WrappedDefault(0), w_stop=WrappedDefault(sys.maxint)) def descr_index(self, space, w_value, w_start, w_stop): - '''L.index(value, [start, [stop]]) -> integer -- return - first index of value''' + """L.index(value, [start, [stop]]) -> integer -- return first index of value. +Raises ValueError if the value is not present.""" # needs to be safe against eq_w() mutating the w_list behind our back size = self.length() i, stop = unwrap_start_stop(space, size, w_start, w_stop) @@ -673,8 +672,7 @@ @unwrap_spec(reverse=int) def descr_sort(self, space, w_key=None, reverse=False): - """ L.sort(key=None, reverse=False) -- stable - sort *IN PLACE*""" + """L.sort(key=None, reverse=False) -> None -- stable sort *IN PLACE*""" has_key = not space.is_none(w_key) # create and setup a TimSort instance diff --git a/pypy/objspace/std/noneobject.py b/pypy/objspace/std/noneobject.py --- a/pypy/objspace/std/noneobject.py +++ b/pypy/objspace/std/noneobject.py @@ -9,7 +9,7 @@ @staticmethod def descr_new(space, w_type): - """T.__new__(S, ...) -> a new object with type S, a subtype of T""" + "Create and return a new object. See help(type) for accurate signature." return space.w_None def descr_bool(self, space): diff --git a/pypy/objspace/std/test/test_listobject.py b/pypy/objspace/std/test/test_listobject.py --- a/pypy/objspace/std/test/test_listobject.py +++ b/pypy/objspace/std/test/test_listobject.py @@ -445,8 +445,8 @@ def test_doc(self): assert list.__doc__ == "list() -> new empty list\nlist(iterable) -> new list initialized from iterable's items" - assert list.__new__.__doc__ == "T.__new__(S, ...) -> a new object with type S, a subtype of T" - assert list.__init__.__doc__ == "x.__init__(...) initializes x; see help(type(x)) for signature" + assert list.__new__.__doc__ == "Create and return a new object. See help(type) for accurate signature." + assert list.__init__.__doc__ == "Initialize self. See help(type(self)) for accurate signature." def test_getstrategyfromlist_w(self): l0 = ["a", "2", "a", True] From pypy.commits at gmail.com Tue Nov 21 18:57:52 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 21 Nov 2017 15:57:52 -0800 (PST) Subject: [pypy-commit] pypy default: update vmprof from upstream which cleans up most of the gcc warnings Message-ID: <5a14bd80.8e8bdf0a.78af8.e0f4@mx.google.com> Author: Matti Picus Branch: Changeset: r93121:2c9ec695ca2c Date: 2017-11-22 01:56 +0200 http://bitbucket.org/pypy/pypy/changeset/2c9ec695ca2c/ Log: update vmprof from upstream which cleans up most of the gcc warnings diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c --- a/rpython/rlib/rvmprof/src/rvmprof.c +++ b/rpython/rlib/rvmprof/src/rvmprof.c @@ -12,6 +12,7 @@ #endif +#include "vmprof_common.h" #include "shared/vmprof_get_custom_offset.h" #ifdef VMPROF_UNIX @@ -30,7 +31,7 @@ } #endif -long vmprof_get_profile_path(const char * buffer, long size) +long vmprof_get_profile_path(char * buffer, long size) { return vmp_fd_to_path(vmp_profile_fileno(), buffer, size); } diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h --- a/rpython/rlib/rvmprof/src/rvmprof.h +++ b/rpython/rlib/rvmprof/src/rvmprof.h @@ -36,8 +36,8 @@ RPY_EXTERN int vmprof_stack_append(void*, long); RPY_EXTERN long vmprof_stack_pop(void*); RPY_EXTERN void vmprof_stack_free(void*); -RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, intptr_t*, intptr_t); -RPY_EXTERN long vmprof_get_profile_path(const char *, long); +RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, void**, intptr_t); +RPY_EXTERN long vmprof_get_profile_path(char *, long); RPY_EXTERN int vmprof_stop_sampling(void); RPY_EXTERN void vmprof_start_sampling(void); diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c --- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c +++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c @@ -262,7 +262,7 @@ } int depth = 0; - PY_STACK_FRAME_T * top_most_frame = frame; + //PY_STACK_FRAME_T * top_most_frame = frame; while ((depth + _per_loop()) <= max_depth) { unw_get_proc_info(&cursor, &pip); @@ -400,7 +400,7 @@ if (fd == NULL) { return 0; } - char * saveptr; + char * saveptr = NULL; char * line = NULL; char * he = NULL; char * name; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -4,6 +4,9 @@ #include #ifdef RPYTHON_VMPROF + +int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc); + #ifdef RPYTHON_LL2CTYPES /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */ @@ -193,7 +196,7 @@ #endif intptr_t vmprof_get_traceback(void *stack, void *ucontext, - intptr_t *result_p, intptr_t result_length) + void **result_p, intptr_t result_length) { int n; int enabled; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -96,7 +96,7 @@ #endif RPY_EXTERN intptr_t vmprof_get_traceback(void *stack, void *ucontext, - intptr_t *result_p, intptr_t result_length); + void **result_p, intptr_t result_length); #endif int vmprof_get_signal_type(void); diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -144,7 +144,8 @@ @pytest.fixture def init(self, tmpdir): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + eci = ExternalCompilationInfo(compile_extra=['-g','-O0', '-Werror'], + post_include_bits = ['int native_func(int);'], separate_module_sources=[""" RPY_EXTERN int native_func(int d) { int j = 0; From pypy.commits at gmail.com Wed Nov 22 13:32:16 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 10:32:16 -0800 (PST) Subject: [pypy-commit] pypy py3.5: io.BufferedRandom also uses the new readinto() implementation Message-ID: <5a15c2b0.d1911c0a.8150a.016b@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93122:70b9b696e219 Date: 2017-11-22 18:32 +0000 http://bitbucket.org/pypy/pypy/changeset/70b9b696e219/ Log: io.BufferedRandom also uses the new readinto() implementation diff --git a/pypy/module/_io/interp_bufferedio.py b/pypy/module/_io/interp_bufferedio.py --- a/pypy/module/_io/interp_bufferedio.py +++ b/pypy/module/_io/interp_bufferedio.py @@ -869,19 +869,14 @@ finally: self._reader_reset_buf() -class W_BufferedReader(BufferedMixin, W_BufferedIOBase): - @unwrap_spec(buffer_size=int) - def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE): - self.state = STATE_ZERO - check_readable_w(space, w_raw) +class BufferedReaderMixin(BufferedMixin): + _mixin_ = True - self.w_raw = w_raw - self.buffer_size = buffer_size - self.readable = True + def readinto_w(self, space, w_buffer): + return self._readinto(space, w_buffer, read_once=False) - self._init(space) - self._reader_reset_buf() - self.state = STATE_OK + def readinto1_w(self, space, w_buffer): + return self._readinto(space, w_buffer, read_once=True) def _readinto(self, space, w_buffer, read_once): rwbuffer = space.writebuf_w(w_buffer) @@ -904,7 +899,8 @@ self.pos = 0 if written + len(self.buffer) < length: try: - got = self._raw_read(space, rwbuffer, written, length - written) + got = self._raw_read( + space, rwbuffer, written, length - written) written += got except BlockingIOError: got = 0 @@ -929,6 +925,19 @@ return space.newint(written) +class W_BufferedReader(BufferedReaderMixin, W_BufferedIOBase): + @unwrap_spec(buffer_size=int) + def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE): + self.state = STATE_ZERO + check_readable_w(space, w_raw) + + self.w_raw = w_raw + self.buffer_size = buffer_size + self.readable = True + + self._init(space) + self._reader_reset_buf() + self.state = STATE_OK W_BufferedReader.typedef = TypeDef( '_io.BufferedReader', W_BufferedIOBase.typedef, @@ -939,6 +948,8 @@ read = interp2app(W_BufferedReader.read_w), peek = interp2app(W_BufferedReader.peek_w), read1 = interp2app(W_BufferedReader.read1_w), + readinto = interp2app(W_BufferedReader.readinto_w), + readinto1 = interp2app(W_BufferedReader.readinto1_w), raw = interp_attrproperty_w("w_raw", cls=W_BufferedReader), readline = interp2app(W_BufferedReader.readline_w), @@ -1100,7 +1111,7 @@ **methods ) -class W_BufferedRandom(BufferedMixin, W_BufferedIOBase): +class W_BufferedRandom(BufferedReaderMixin, W_BufferedIOBase): @unwrap_spec(buffer_size=int) def descr_init(self, space, w_raw, buffer_size=DEFAULT_BUFFER_SIZE): self.state = STATE_ZERO @@ -1128,6 +1139,8 @@ peek = interp2app(W_BufferedRandom.peek_w), read1 = interp2app(W_BufferedRandom.read1_w), readline = interp2app(W_BufferedRandom.readline_w), + readinto = interp2app(W_BufferedRandom.readinto_w), + readinto1 = interp2app(W_BufferedRandom.readinto1_w), write = interp2app(W_BufferedRandom.write_w), flush = interp2app(W_BufferedRandom.flush_w), diff --git a/pypy/module/_io/test/test_bufferedio.py b/pypy/module/_io/test/test_bufferedio.py --- a/pypy/module/_io/test/test_bufferedio.py +++ b/pypy/module/_io/test/test_bufferedio.py @@ -199,6 +199,19 @@ def readinto(self, buf): buf[:3] = b"abc" return 3 + + def writable(self): + return True + + def write(self, b): + return len(b) + + def seekable(self): + return True + + def seek(self, pos, whence): + return 0 + bufio = _io.BufferedReader(MockIO(), buffer_size=5) buf = bytearray(10) bufio.read(2) @@ -223,6 +236,15 @@ assert n == 1 assert buf[:n] == b'c' + bufio = _io.BufferedRandom(MockIO(), buffer_size=10) + buf = bytearray(20) + bufio.peek(3) + assert bufio.readinto1(buf) == 6 + assert buf[:6] == b'abcabc' + + bufio = _io.BufferedWriter(MockIO(), buffer_size=10) + raises(_io.UnsupportedOperation, bufio.readinto1, bytearray(10)) + def test_seek(self): import _io raw = _io.FileIO(self.tmpfile) From pypy.commits at gmail.com Wed Nov 22 16:04:45 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 22 Nov 2017 13:04:45 -0800 (PST) Subject: [pypy-commit] buildbot default: use os tools to clean out old virtualenv (untested) Message-ID: <5a15e66d.923e1c0a.75bd2.001d@mx.google.com> Author: Matti Picus Branch: Changeset: r1039:0b37e98f8694 Date: 2017-11-08 03:00 +0200 http://bitbucket.org/pypy/buildbot/changeset/0b37e98f8694/ Log: use os tools to clean out old virtualenv (untested) diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -467,16 +467,23 @@ timeout=4000, env={"TMPDIR": Interpolate('%(prop:target_tmpdir)s' + pytest), })) + if platform == 'win32': + virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe' + clean = 'rmdir /s /q pypy-venv' + else: + virt_pypy = '../venv/pypy-venv/bin/python' + clean = 'rm -rf pypy-env' + factory.addStep(ShellCmd( + description="clean old virtualenv", + command=clean, + workdir='venv', + haltOnFailure=False)) factory.addStep(ShellCmd( description="Create virtualenv", command=prefix + ['virtualenv', '--clear', '-p', Property('target_path'), 'pypy-venv'], workdir='venv', flunkOnFailure=True)) - if platform == 'win32': - virt_pypy = r'..\venv\pypy-venv\Scripts\python.exe' - else: - virt_pypy = '../venv/pypy-venv/bin/python' factory.addStep(ShellCmd( description="Install extra tests requirements", command=prefix + [virt_pypy, '-m', 'pip', 'install', From pypy.commits at gmail.com Wed Nov 22 16:04:48 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 22 Nov 2017 13:04:48 -0800 (PST) Subject: [pypy-commit] buildbot default: use upgraded virtualenv for pypy -A tests, clean out old virtualenv's Message-ID: <5a15e670.57addf0a.5fc48.c014@mx.google.com> Author: Matti Picus Branch: Changeset: r1040:be8418f6ed85 Date: 2017-11-22 23:03 +0200 http://bitbucket.org/pypy/buildbot/changeset/be8418f6ed85/ Log: use upgraded virtualenv for pypy -A tests, clean out old virtualenv's diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -473,15 +473,21 @@ else: virt_pypy = '../venv/pypy-venv/bin/python' clean = 'rm -rf pypy-env' + target = Property('target_path') factory.addStep(ShellCmd( description="clean old virtualenv", command=clean, workdir='venv', haltOnFailure=False)) factory.addStep(ShellCmd( + description="Install recent virtualenv", + command=prefix + [target, '-mpip', 'install', '--upgrade', + 'virtualenv'], + workdir='venv', + flunkOnFailure=True)) + factory.addStep(ShellCmd( description="Create virtualenv", - command=prefix + ['virtualenv', '--clear', '-p', - Property('target_path'), 'pypy-venv'], + command=prefix + [target, '-mvirtualenv', '--clear', 'pypy-venv'], workdir='venv', flunkOnFailure=True)) factory.addStep(ShellCmd( @@ -555,17 +561,22 @@ haltOnFailure=False, )) + if platform == 'win32': + self.virt_python = r'virt_test\Scripts\python.exe' + clean = 'rmdir /s /q virt-test' + else: + self.virt_python = 'virt_test/bin/python' + clean = 'rm -rf virt-test' + self.addStep(ShellCmd( + description="clean old virtualenv", + command=clean, + haltOnFailure=False)) self.addStep(ShellCmd( description="create virtualenv for tests", command=['virtualenv', 'virt_test'], haltOnFailure=True, )) - if platform == 'win32': - self.virt_python = r'virt_test\Scripts\python.exe' - else: - self.virt_python = 'virt_test/bin/python' - self.addStep(ShellCmd( description="install requirments to virtual environment", command=[self.virt_python, '-mpip', 'install', '-r', From pypy.commits at gmail.com Wed Nov 22 16:10:29 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 22 Nov 2017 13:10:29 -0800 (PST) Subject: [pypy-commit] buildbot default: typo (arigato) Message-ID: <5a15e7c5.8faedf0a.ec3e7.a90a@mx.google.com> Author: Matti Picus Branch: Changeset: r1041:b9268dadd68a Date: 2017-11-22 23:10 +0200 http://bitbucket.org/pypy/buildbot/changeset/b9268dadd68a/ Log: typo (arigato) diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -472,7 +472,7 @@ clean = 'rmdir /s /q pypy-venv' else: virt_pypy = '../venv/pypy-venv/bin/python' - clean = 'rm -rf pypy-env' + clean = 'rm -rf pypy-venv' target = Property('target_path') factory.addStep(ShellCmd( description="clean old virtualenv", From pypy.commits at gmail.com Wed Nov 22 17:22:26 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 22 Nov 2017 14:22:26 -0800 (PST) Subject: [pypy-commit] pypy default: Untested and hard-to-test, but for symmetry reasons if we Message-ID: <5a15f8a2.b198df0a.c8d51.0f43@mx.google.com> Author: Armin Rigo Branch: Changeset: r93123:386b50664e3e Date: 2017-11-22 23:21 +0100 http://bitbucket.org/pypy/pypy/changeset/386b50664e3e/ Log: Untested and hard-to-test, but for symmetry reasons if we don't call start_sampling() here then it means sampling will not restart after some switches diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -26,12 +26,14 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) + rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: cintf.empty_rvmprof_stack() h = self._gcrootfinder.new(self, callback, arg) finally: cintf.restore_rvmprof_stack(x) + rvmprof.start_sampling() if DEBUG: debug.add(h) return h From pypy.commits at gmail.com Wed Nov 22 17:44:28 2017 From: pypy.commits at gmail.com (arigo) Date: Wed, 22 Nov 2017 14:44:28 -0800 (PST) Subject: [pypy-commit] pypy default: * Be more careful and let stop_sampling()/start_sampling() be called in code Message-ID: <5a15fdcc.48d31c0a.f36c0.b5b8@mx.google.com> Author: Armin Rigo Branch: Changeset: r93124:1cc101a9ee5a Date: 2017-11-22 23:43 +0100 http://bitbucket.org/pypy/pypy/changeset/1cc101a9ee5a/ Log: * Be more careful and let stop_sampling()/start_sampling() be called in code that is not compiled with rvmprof. This is needed from rstacklet; previously, it would fail translation on any non-rvmprof- supported platform as soon as rstacklet is used. * We already call a function from vmprof in rstacklet.py. No point in calling another one, when we can make the function have both effects. diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,6 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib import rvmprof from rpython.rlib.rvmprof import cintf DEBUG = False @@ -26,14 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: cintf.empty_rvmprof_stack() h = self._gcrootfinder.new(self, callback, arg) finally: cintf.restore_rvmprof_stack(x) - rvmprof.start_sampling() if DEBUG: debug.add(h) return h @@ -43,13 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) - rvmprof.start_sampling() if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,8 +56,10 @@ return None def stop_sampling(): - fd = _get_vmprof().cintf.vmprof_stop_sampling() + from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling + fd = vmprof_stop_sampling() return rffi.cast(lltype.Signed, fd) def start_sampling(): - _get_vmprof().cintf.vmprof_start_sampling() + from rpython.rlib.rvmprof.cintf import vmprof_start_sampling + vmprof_start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -40,7 +40,7 @@ compile_extra += ['-DVMPROF_UNIX'] compile_extra += ['-DVMPROF_LINUX'] elif sys.platform == 'win32': - compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS'] + compile_extra += ['-DVMPROF_WINDOWS'] separate_module_files = [SHARED.join('vmprof_win.c')] _libs = [] else: @@ -120,16 +120,26 @@ vmprof_get_profile_path = rffi.llexternal("vmprof_get_profile_path", [rffi.CCHARP, lltype.Signed], lltype.Signed, compilation_info=eci, _nowrapper=True) - vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=eci, - _nowrapper=True) - vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=eci, - _nowrapper=True) return CInterface(locals()) +# this is always present, but compiles to no-op if RPYTHON_VMPROF is not +# defined (i.e. if we don't actually use vmprof in the generated C) +auto_eci = ExternalCompilationInfo(post_include_bits=[""" +#ifndef RPYTHON_VMPROF +# define vmprof_stop_sampling() (-1) +# define vmprof_start_sampling() ((void)0) +#endif +"""]) + +vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=auto_eci, + _nowrapper=True) +vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=auto_eci, + _nowrapper=True) + class CInterface(object): def __init__(self, namespace): @@ -218,6 +228,7 @@ # stacklet support def save_rvmprof_stack(): + vmprof_stop_sampling() return vmprof_tl_stack.get_or_make_raw() def empty_rvmprof_stack(): @@ -225,6 +236,7 @@ def restore_rvmprof_stack(x): vmprof_tl_stack.setraw(x) + vmprof_start_sampling() # # traceback support From pypy.commits at gmail.com Wed Nov 22 17:46:29 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 14:46:29 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Prevent test from crashing for an unrelated reason Message-ID: <5a15fe45.968ddf0a.433a7.de3f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93125:9a354884fd09 Date: 2017-11-22 22:46 +0000 http://bitbucket.org/pypy/pypy/changeset/9a354884fd09/ Log: Prevent test from crashing for an unrelated reason diff --git a/lib-python/3/test/test_descr.py b/lib-python/3/test/test_descr.py --- a/lib-python/3/test/test_descr.py +++ b/lib-python/3/test/test_descr.py @@ -4278,7 +4278,10 @@ c = C() c.__dict__[Evil()] = 0 - self.assertEqual(c.attr, 1) + try: + self.assertEqual(c.attr, 1) + except AttributeError: # when Evil.__eq__ is called twice + pass # this makes a crash more likely: support.gc_collect() self.assertNotHasAttr(c, 'attr') From pypy.commits at gmail.com Wed Nov 22 17:50:48 2017 From: pypy.commits at gmail.com (fijal) Date: Wed, 22 Nov 2017 14:50:48 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: in progress io Message-ID: <5a15ff48.02b8df0a.6f8fa.3148@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93126:559a0a0bb302 Date: 2017-11-22 23:50 +0100 http://bitbucket.org/pypy/pypy/changeset/559a0a0bb302/ Log: in progress io diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1779,6 +1779,9 @@ assert not hasattr(self, 'is_fake_objspace') return W_UnicodeObject.convert_arg_to_w_unicode(self, w_obj, strict) + def utf8_len_w(self, w_obj): + w_obj = self.convert_arg_to_w_unicode(w_obj) + return w_obj._utf8, w_obj._len() def realutf8_w(self, w_obj): # Like utf8_w(), but only works if w_obj is really of type diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -10,7 +10,8 @@ from pypy.module._io.interp_iobase import W_IOBase, convert_size, trap_eintr from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint -from rpython.rlib.rstring import UnicodeBuilder +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8 STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -29,17 +30,22 @@ def __init__(self, space): self.w_newlines_dict = { - SEEN_CR: space.newunicode(u"\r"), - SEEN_LF: space.newunicode(u"\n"), - SEEN_CRLF: space.newunicode(u"\r\n"), + SEEN_CR: space.newutf8("\r", 1, FLAG_ASCII), + SEEN_LF: space.newutf8("\n", 1, FLAG_ASCII), + SEEN_CRLF: space.newutf8("\r\n", 2, FLAG_ASCII), SEEN_CR | SEEN_LF: space.newtuple( - [space.newunicode(u"\r"), space.newunicode(u"\n")]), + [space.newutf8("\r", 1, FLAG_ASCII), + space.newutf8("\n", 1, FLAG_ASCII)]), SEEN_CR | SEEN_CRLF: space.newtuple( - [space.newunicode(u"\r"), space.newunicode(u"\r\n")]), + [space.newutf8("\r", 1, FLAG_ASCII), + space.newutf8("\r\n", 2, FLAG_ASCII)]), SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newunicode(u"\n"), space.newunicode(u"\r\n")]), + [space.newutf8("\n", 1, FLAG_ASCII), + space.newutf8("\r\n", 2, FLAG_ASCII)]), SEEN_CR | SEEN_LF | SEEN_CRLF: space.newtuple( - [space.newunicode(u"\r"), space.newunicode(u"\n"), space.newunicode(u"\r\n")]), + [space.newutf8("\r", 1, FLAG_ASCII), + space.newutf8("\n", 1, FLAG_ASCII), + space.newutf8("\r\n", 2, FLAG_ASCII)]), } @unwrap_spec(translate=int) @@ -73,25 +79,25 @@ raise oefmt(space.w_TypeError, "decoder should return a string result") - output = space.unicode_w(w_output) + output, output_len = space.utf8_len_w(w_output) output_len = len(output) if self.pendingcr and (final or output_len): - output = u'\r' + output + output = '\r' + output self.pendingcr = False output_len += 1 # retain last \r even when not translating data: # then readline() is sure to get \r\n in one pass if not final and output_len > 0: - last = output_len - 1 + last = len(output) - 1 assert last >= 0 - if output[last] == u'\r': + if output[last] == '\r': output = output[:last] self.pendingcr = True output_len -= 1 if output_len == 0: - return space.newunicode(u"") + return space.newutf8("", 1, FLAG_ASCII) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -101,52 +107,53 @@ # for the \r only_lf = False if seennl == SEEN_LF or seennl == 0: - only_lf = (output.find(u'\r') < 0) + only_lf = (output.find('\r') < 0) if only_lf: # If not already seen, quick scan for a possible "\n" character. # (there's nothing else to be done, even when in translation mode) - if seennl == 0 and output.find(u'\n') >= 0: + if seennl == 0 and output.find('\n') >= 0: seennl |= SEEN_LF # Finished: we have scanned for newlines, and none of them # need translating. elif not self.translate: i = 0 - while i < output_len: + while i < len(output): if seennl == SEEN_ALL: break c = output[i] i += 1 - if c == u'\n': + if c == '\n': seennl |= SEEN_LF - elif c == u'\r': - if i < output_len and output[i] == u'\n': + elif c == '\r': + if i < len(output) and output[i] == '\n': seennl |= SEEN_CRLF i += 1 else: seennl |= SEEN_CR - elif output.find(u'\r') >= 0: + elif output.find('\r') >= 0: # Translate! - builder = UnicodeBuilder(output_len) + builder = StringBuilder(len(output)) i = 0 while i < output_len: c = output[i] i += 1 - if c == u'\n': + if c == '\n': seennl |= SEEN_LF - elif c == u'\r': - if i < output_len and output[i] == u'\n': + elif c == '\r': + if i < len(output) and output[i] == '\n': seennl |= SEEN_CRLF i += 1 else: seennl |= SEEN_CR - builder.append(u'\n') + builder.append('\n') continue builder.append(c) output = builder.build() self.seennl |= seennl - return space.newunicode(output) + lgt, flag = check_utf8(output, True) + return space.newutf8(output, lgt, flag) def reset_w(self, space): self.seennl = 0 @@ -373,8 +380,8 @@ if space.is_none(w_newline): newline = None else: - newline = space.unicode_w(w_newline) - if newline and newline not in (u'\n', u'\r\n', u'\r'): + newline = space.utf8_w(w_newline) + if newline and newline not in ('\n', '\r\n', '\r'): raise oefmt(space.w_ValueError, "illegal newline value: %R", w_newline) @@ -384,13 +391,13 @@ self.readtranslate = newline is None self.readnl = newline - self.writetranslate = (newline != u'') + self.writetranslate = (newline != '') if not self.readuniversal: self.writenl = self.readnl - if self.writenl == u'\n': + if self.writenl == '\n': self.writenl = None elif _WINDOWS: - self.writenl = u"\r\n" + self.writenl = "\r\n" else: self.writenl = None @@ -519,7 +526,7 @@ def _get_decoded_chars(self, size): if self.decoded_chars is None: - return u"" + return "" available = len(self.decoded_chars) - self.decoded_chars_used if size < 0 or size > available: @@ -574,7 +581,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded_chars(space.utf8_w(w_decoded)) if space.len_w(w_decoded) > 0: eof = False @@ -745,20 +752,19 @@ raise oefmt(space.w_TypeError, "unicode argument expected, got '%T'", w_text) - text = space.unicode_w(w_text) - textlen = len(text) + text, textlen = space.utf8_len_w(w_text) haslf = False if (self.writetranslate and self.writenl) or self.line_buffering: - if text.find(u'\n') >= 0: + if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'), space.newunicode(self.writenl)) - text = space.unicode_w(w_text) + text = space.utf8_w(w_text) needflush = False - if self.line_buffering and (haslf or text.find(u'\r') >= 0): + if self.line_buffering and (haslf or text.find('\r') >= 0): needflush = True # XXX What if we were just reading? From pypy.commits at gmail.com Wed Nov 22 23:16:09 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 20:16:09 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Do some unicode>utf8 conversions in interp_textio Message-ID: <5a164b89.169a1c0a.63814.9b51@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93128:a8f461710bf8 Date: 2017-11-23 03:34 +0000 http://bitbucket.org/pypy/pypy/changeset/a8f461710bf8/ Log: Do some unicode>utf8 conversions in interp_textio diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -97,7 +97,7 @@ output_len -= 1 if output_len == 0: - return space.newutf8("", 1, FLAG_ASCII) + return space.newutf8("", 0, FLAG_ASCII) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -226,7 +226,7 @@ if self.readtranslate: # Newlines are already translated, only search for \n - pos = line.find(u'\n', start, end) + pos = line.find('\n', start, end) if pos >= 0: return pos - start + 1, 0 else: @@ -617,13 +617,13 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self._get_decoded_chars(-1)) + w_result = space.new_from_utf8(self._get_decoded_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = UnicodeBuilder(size) + builder = StringBuilder(size) # Keep reading chunks until we have n characters to return while True: @@ -643,7 +643,7 @@ continue raise - return space.newunicode(builder.build()) + return space.new_from_utf8(builder.build()) def readline_w(self, space, w_limit=None): self._check_attached(space) @@ -731,12 +731,12 @@ if chunks: if line: chunks.append(line) - line = u''.join(chunks) + line = ''.join(chunks) if line: - return space.newunicode(line) + return space.new_from_utf8(line) else: - return space.newunicode(u'') + return space.newutf8('', 0, FLAG_ASCII) # _____________________________________________________________ # write methods @@ -759,8 +759,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'), - space.newunicode(self.writenl)) + w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), + space.new_from_utf8(self.writenl)) text = space.utf8_w(w_text) needflush = False @@ -982,7 +982,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(input[i])) check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.bytes_to_feed += 1 From pypy.commits at gmail.com Wed Nov 22 23:16:07 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 20:16:07 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Add (back) convenience methods space.newunicode(), space.new_from_utf8() and Message-ID: <5a164b87.83b91c0a.f6cd7.8e07@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93127:b89046216269 Date: 2017-11-23 03:14 +0000 http://bitbucket.org/pypy/pypy/changeset/b89046216269/ Log: Add (back) convenience methods space.newunicode(), space.new_from_utf8() and space.unicode_w() diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -272,7 +272,7 @@ self._typed_unwrap_error(space, "unicode") def convert_to_w_unicode(self, space): - self._typed_unwrap_error(space, "unicode") + self._typed_unwrap_error(space, "unicode") def bytearray_list_of_chars_w(self, space): self._typed_unwrap_error(space, "bytearray") @@ -1759,6 +1759,11 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) + + def unicode_w(self, w_obj): + # XXX: kill me! + return w_obj.utf8_w(self).decode('utf-8') + def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,10 +367,23 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) + def new_from_utf8(self, utf8s): + # XXX: kill me! + assert isinstance(utf8s, str) + length, flag = rutf8.check_utf8(utf8s, True) + return W_UnicodeObject(utf8s, length, flag) + def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding + def newunicode(self, unistr): + # XXX: kill me! + assert isinstance(unistr, unicode) + utf8s = unistr.encode("utf-8") + length, flag = rutf8.check_utf8(utf8s, True) + return self.newutf8(utf8s, length, flag) + def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Thu Nov 23 00:14:51 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 21:14:51 -0800 (PST) Subject: [pypy-commit] pypy default: Refactor interp_textio.py a little Message-ID: <5a16594b.8b8a1c0a.47f9e.aaf0@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93129:6eab39056eb5 Date: 2017-11-23 05:14 +0000 http://bitbucket.org/pypy/pypy/changeset/6eab39056eb5/ Log: Refactor interp_textio.py a little diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -184,9 +184,7 @@ start, end ) - if endpos >= 0: - endpos += start - else: + if endpos < 0: endpos = end assert endpos >= 0 self.pos = endpos diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -217,30 +217,28 @@ def _find_line_ending(self, line, start, end): size = end - start if self.readtranslate: - # Newlines are already translated, only search for \n pos = line.find(u'\n', start, end) if pos >= 0: - return pos - start + 1, 0 + return pos + 1, 0 else: return -1, size elif self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces - i = 0 + i = start while True: - # Fast path for non-control chars. The loop always ends - # since the Py_UNICODE storage is NUL-terminated. - while i < size and line[start + i] > '\r': + # Fast path for non-control chars. + while i < end and line[i] > '\r': i += 1 - if i >= size: + if i >= end: return -1, size - ch = line[start + i] + ch = line[i] i += 1 if ch == '\n': return i, 0 if ch == '\r': - if line[start + i] == '\n': + if line[i] == '\n': return i + 1, 0 else: return i, 0 @@ -248,7 +246,7 @@ # Non-universal mode. pos = line.find(self.readnl, start, end) if pos >= 0: - return pos - start + len(self.readnl), 0 + return pos + len(self.readnl), 0 else: pos = line.find(self.readnl[0], start, end) if pos >= 0: @@ -513,8 +511,13 @@ # _____________________________________________________________ # read methods - def _set_decoded_chars(self, chars): - self.decoded_chars = chars + def _unset_decoded(self): + self.decoded_chars = None + self.decoded_chars_used = 0 + + def _set_decoded(self, space, w_decoded): + check_decoded(space, w_decoded) + self.decoded_chars = space.unicode_w(w_decoded) self.decoded_chars_used = 0 def _get_decoded_chars(self, size): @@ -573,8 +576,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -664,7 +666,7 @@ raise if not has_data: # end of file - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None start = endpos = offset_to_buffer = 0 break @@ -683,7 +685,6 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) if endpos >= 0: - endpos += start if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked assert endpos >= 0 @@ -709,7 +710,7 @@ remaining = line[endpos:] line = None # We have consumed the buffer - self._set_decoded_chars(None) + self._unset_decoded() if line: # Our line ends in the current buffer @@ -861,7 +862,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -886,7 +887,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -907,8 +908,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters if len(self.decoded_chars) < cookie.chars_to_skip: @@ -976,7 +976,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(input[i])) check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.bytes_to_feed += 1 From pypy.commits at gmail.com Thu Nov 23 01:06:45 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 22:06:45 -0800 (PST) Subject: [pypy-commit] pypy default: Use a UnicodeBuilder in _io.TextIOWrapper.readline Message-ID: <5a166575.103e1c0a.4e643.533d@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93130:870515a86876 Date: 2017-11-23 06:06 +0000 http://bitbucket.org/pypy/pypy/changeset/870515a86876/ Log: Use a UnicodeBuilder in _io.TextIOWrapper.readline diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -646,11 +646,10 @@ self._writeflush(space) limit = convert_size(space, w_limit) - chunked = 0 line = None remaining = None - chunks = [] + builder = UnicodeBuilder() while True: # First, get some data if necessary @@ -684,6 +683,7 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) + chunked = builder.getlength() if endpos >= 0: if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked @@ -702,8 +702,8 @@ # No line ending seen yet - put aside current data if endpos > start: s = line[start:endpos] - chunks.append(s) - chunked += len(s) + builder.append(s) + # There may be some remaining bytes we'll have to prepend to the # next chunk of data if endpos < line_len: @@ -719,18 +719,12 @@ self.decoded_chars_used = decoded_chars_used if start > 0 or endpos < len(line): line = line[start:endpos] - if remaining: - chunks.append(remaining) - remaining = None - if chunks: - if line: - chunks.append(line) - line = u''.join(chunks) + builder.append(line) + elif remaining: + builder.append(remaining) - if line: - return space.newunicode(line) - else: - return space.newunicode(u'') + result = builder.build() + return space.newunicode(result) # _____________________________________________________________ # write methods From pypy.commits at gmail.com Thu Nov 23 01:42:30 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 22:42:30 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Refactor interp_textio.py a little Message-ID: <5a166dd6.22a8df0a.e5ef.d731@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93131:031e80f0a68e Date: 2017-11-23 05:14 +0000 http://bitbucket.org/pypy/pypy/changeset/031e80f0a68e/ Log: Refactor interp_textio.py a little diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -184,9 +184,7 @@ start, end ) - if endpos >= 0: - endpos += start - else: + if endpos < 0: endpos = end assert endpos >= 0 self.pos = endpos diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -224,30 +224,28 @@ def _find_line_ending(self, line, start, end): size = end - start if self.readtranslate: - # Newlines are already translated, only search for \n pos = line.find('\n', start, end) if pos >= 0: - return pos - start + 1, 0 + return pos + 1, 0 else: return -1, size elif self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces - i = 0 + i = start while True: - # Fast path for non-control chars. The loop always ends - # since the Py_UNICODE storage is NUL-terminated. - while i < size and line[start + i] > '\r': + # Fast path for non-control chars. + while i < end and line[i] > '\r': i += 1 - if i >= size: + if i >= end: return -1, size - ch = line[start + i] + ch = line[i] i += 1 if ch == '\n': return i, 0 if ch == '\r': - if line[start + i] == '\n': + if line[i] == '\n': return i + 1, 0 else: return i, 0 @@ -255,7 +253,7 @@ # Non-universal mode. pos = line.find(self.readnl, start, end) if pos >= 0: - return pos - start + len(self.readnl), 0 + return pos + len(self.readnl), 0 else: pos = line.find(self.readnl[0], start, end) if pos >= 0: @@ -520,8 +518,13 @@ # _____________________________________________________________ # read methods - def _set_decoded_chars(self, chars): - self.decoded_chars = chars + def _unset_decoded(self): + self.decoded_chars = None + self.decoded_chars_used = 0 + + def _set_decoded(self, space, w_decoded): + check_decoded(space, w_decoded) + self.decoded_chars = space.utf8_w(w_decoded) self.decoded_chars_used = 0 def _get_decoded_chars(self, size): @@ -580,8 +583,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.utf8_w(w_decoded)) + self._set_decoded(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -671,7 +673,7 @@ raise if not has_data: # end of file - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None start = endpos = offset_to_buffer = 0 break @@ -690,7 +692,6 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) if endpos >= 0: - endpos += start if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked assert endpos >= 0 @@ -716,7 +717,7 @@ remaining = line[endpos:] line = None # We have consumed the buffer - self._set_decoded_chars(None) + self._unset_decoded() if line: # Our line ends in the current buffer @@ -867,7 +868,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -892,7 +893,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -913,8 +914,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters if len(self.decoded_chars) < cookie.chars_to_skip: From pypy.commits at gmail.com Thu Nov 23 01:42:32 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 22 Nov 2017 22:42:32 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Use a UnicodeBuilder in _io.TextIOWrapper.readline Message-ID: <5a166dd8.0eef1c0a.3a2c3.08c2@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93132:8c2553a25336 Date: 2017-11-23 06:06 +0000 http://bitbucket.org/pypy/pypy/changeset/8c2553a25336/ Log: Use a UnicodeBuilder in _io.TextIOWrapper.readline diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -653,11 +653,10 @@ self._writeflush(space) limit = convert_size(space, w_limit) - chunked = 0 line = None remaining = None - chunks = [] + builder = StringBuilder() while True: # First, get some data if necessary @@ -691,6 +690,7 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) + chunked = builder.getlength() if endpos >= 0: if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked @@ -709,8 +709,8 @@ # No line ending seen yet - put aside current data if endpos > start: s = line[start:endpos] - chunks.append(s) - chunked += len(s) + builder.append(s) + # There may be some remaining bytes we'll have to prepend to the # next chunk of data if endpos < line_len: @@ -726,18 +726,12 @@ self.decoded_chars_used = decoded_chars_used if start > 0 or endpos < len(line): line = line[start:endpos] - if remaining: - chunks.append(remaining) - remaining = None - if chunks: - if line: - chunks.append(line) - line = ''.join(chunks) + builder.append(line) + elif remaining: + builder.append(remaining) - if line: - return space.new_from_utf8(line) - else: - return space.newutf8('', 0, FLAG_ASCII) + result = builder.build() + return space.new_from_utf8(result) # _____________________________________________________________ # write methods From pypy.commits at gmail.com Thu Nov 23 04:27:49 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 23 Nov 2017 01:27:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Tweak the unicode FLAG_xx values for performance; collapse two identical helpers; move combine_flags() to rutf8 Message-ID: <5a169495.54d91c0a.8efdd.63ae@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93133:a1cf21d7a124 Date: 2017-11-23 10:24 +0100 http://bitbucket.org/pypy/pypy/changeset/a1cf21d7a124/ Log: Tweak the unicode FLAG_xx values for performance; collapse two identical helpers; move combine_flags() to rutf8 diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -3,6 +3,7 @@ from pypy.interpreter.error import OperationError from rpython.rlib.objectmodel import specialize from rpython.rlib import rutf8 +from rpython.rlib.rutf8 import combine_flags from rpython.rlib.rarithmetic import r_uint, intmask from rpython.rlib.rstring import StringBuilder from pypy.module._codecs import interp_codecs @@ -43,14 +44,6 @@ from pypy.objspace.std.unicodeobject import encode_object return encode_object(space, w_data, encoding, errors) -def combine_flags(one, two): - if one == rutf8.FLAG_ASCII and two == rutf8.FLAG_ASCII: - return rutf8.FLAG_ASCII - elif (one == rutf8.FLAG_HAS_SURROGATES or - two == rutf8.FLAG_HAS_SURROGATES): - return rutf8.FLAG_HAS_SURROGATES - return rutf8.FLAG_REGULAR - def _has_surrogate(u): for c in u: @@ -788,7 +781,8 @@ # first surrogate surrogate = outCh else: - flag = combine_flags(flag, rutf8.unichr_to_flag(outCh)) + flag = combine_flags(flag, + rutf8.get_flag_from_code(outCh)) outsize += 1 assert outCh >= 0 rutf8.unichr_as_utf8_append(result, outCh, True) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -356,7 +356,7 @@ elif unicodedb.islower(ch): ch = unicodedb.toupper(ch) if ch >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, ch) return W_UnicodeObject(builder.build(), self._length, flag) @@ -381,7 +381,7 @@ else: ch = unicodedb.tolower(ch) if ch >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, ch) previous_is_cased = unicodedb.iscased(ch) return builder.build(), flag @@ -407,7 +407,7 @@ codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): result.append(w_newval._utf8) - flag = unicodehelper.combine_flags(flag, w_newval._get_flag()) + flag = rutf8.combine_flags(flag, w_newval._get_flag()) result_length += w_newval._length continue else: @@ -416,7 +416,7 @@ "or unicode") try: if codepoint >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(result, codepoint, allow_surrogates=True) result_length += 1 @@ -540,7 +540,7 @@ while pos < len(self._utf8): lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) if lower >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates? pos = rutf8.next_codepoint_pos(self._utf8, pos) return W_UnicodeObject(builder.build(), self._len(), flag) @@ -642,7 +642,7 @@ if e.match(space, space.w_TypeError): return space.w_NotImplemented raise - flag = unicodehelper.combine_flags(self._get_flag(), w_other._get_flag()) + flag = rutf8.combine_flags(self._get_flag(), w_other._get_flag()) return W_UnicodeObject(self._utf8 + w_other._utf8, self._len() + w_other._len(), flag) @@ -667,7 +667,7 @@ # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) - flag = unicodehelper.combine_flags(flag, w_u._get_flag()) + flag = rutf8.combine_flags(flag, w_u._get_flag()) unwrapped.append(w_u._utf8) lgt += w_u._length prealloc_size += len(unwrapped[i]) @@ -719,7 +719,7 @@ uchar = rutf8.codepoint_at_pos(value, i) uchar = unicodedb.toupper(uchar) if uchar >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) i = rutf8.next_codepoint_pos(value, i) rutf8.unichr_as_utf8_append(builder, uchar) return W_UnicodeObject(builder.build(), self._length, flag) @@ -833,14 +833,14 @@ ch = unicodedb.toupper(uchar) rutf8.unichr_as_utf8_append(builder, ch) if ch >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) while i < len(value): uchar = rutf8.codepoint_at_pos(value, i) i = rutf8.next_codepoint_pos(value, i) ch = unicodedb.tolower(uchar) rutf8.unichr_as_utf8_append(builder, ch) if ch >= 0x80: - flag = unicodehelper.combine_flags(flag, rutf8.FLAG_REGULAR) + flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) return W_UnicodeObject(builder.build(), self._len(), flag) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) @@ -926,7 +926,7 @@ except OverflowError: raise oefmt(space.w_OverflowError, "replace string is too long") - flag = unicodehelper.combine_flags(self._get_flag(), w_by._get_flag()) + flag = rutf8.combine_flags(self._get_flag(), w_by._get_flag()) newlength = self._length + replacements * (w_by._length - w_sub._length) return W_UnicodeObject(res, newlength, flag) @@ -1048,7 +1048,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "rjust() argument 2 must be a single character") - flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag()) + flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - lgt if d > 0: if len(w_fillchar._utf8) == 1: @@ -1067,7 +1067,7 @@ if w_fillchar._len() != 1: raise oefmt(space.w_TypeError, "ljust() argument 2 must be a single character") - flag = unicodehelper.combine_flags(self._get_flag(), w_fillchar._get_flag()) + flag = rutf8.combine_flags(self._get_flag(), w_fillchar._get_flag()) d = width - self._len() if d > 0: if len(w_fillchar._utf8) == 1: diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -50,6 +50,7 @@ def unichr_as_utf8_append(builder, code, allow_surrogates=False): """Encode code (numeric value) as utf8 encoded string and emit the result into the given StringBuilder. + Raises ValueError if the code is outside range(0x110000). """ code = r_uint(code) if code <= r_uint(0x7F): @@ -124,13 +125,6 @@ continuation_bytes += 1 return len(s) - continuation_bytes -def get_flag_from_code(oc): - if oc <= 0x7F: - return FLAG_ASCII - if 0xD800 <= oc <= 0xDFFF: - return FLAG_HAS_SURROGATES - return FLAG_REGULAR - def codepoint_at_pos(code, pos): """ Give a codepoint in code at pos - assumes valid utf8, no checking! """ @@ -453,22 +447,24 @@ UTF8_INDEX_STORAGE = lltype.GcStruct('utf8_loc', ('flag', lltype.Signed), - ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct( - 'utf8_loc_elem', - ('baseindex', lltype.Signed), - ('ofs', lltype.FixedSizeArray(lltype.Char, 16))) - )))) + ('contents', lltype.Ptr(lltype.GcArray(lltype.Struct('utf8_loc_elem', + ('baseindex', lltype.Signed), + ('ofs', lltype.FixedSizeArray(lltype.Char, 16)), + ))))) -def unichr_to_flag(ch): - if ch <= 0x7F: +def get_flag_from_code(oc): + if oc <= 0x7F: return FLAG_ASCII - elif 0xD800 <= ch <= 0xDFFF: + if 0xD800 <= oc <= 0xDFFF: return FLAG_HAS_SURROGATES return FLAG_REGULAR -FLAG_REGULAR = 0 -FLAG_HAS_SURROGATES = 1 -FLAG_ASCII = 2 +def combine_flags(one, two): + return one | two + +FLAG_ASCII = 0 # no bits +FLAG_REGULAR = 1 # bit 0 +FLAG_HAS_SURROGATES = 3 # bit 0 and bit 1 # note that we never need index storage if we're pure ascii, but it's useful # for passing into W_UnicodeObject.__init__ From pypy.commits at gmail.com Thu Nov 23 04:27:51 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 23 Nov 2017 01:27:51 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge heads Message-ID: <5a169497.cb921c0a.f57f4.8bf0@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93134:25ac6121d03c Date: 2017-11-23 10:26 +0100 http://bitbucket.org/pypy/pypy/changeset/25ac6121d03c/ Log: merge heads diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -272,7 +272,7 @@ self._typed_unwrap_error(space, "unicode") def convert_to_w_unicode(self, space): - self._typed_unwrap_error(space, "unicode") + self._typed_unwrap_error(space, "unicode") def bytearray_list_of_chars_w(self, space): self._typed_unwrap_error(space, "bytearray") @@ -1759,6 +1759,11 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) + + def unicode_w(self, w_obj): + # XXX: kill me! + return w_obj.utf8_w(self).decode('utf-8') + def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -184,9 +184,7 @@ start, end ) - if endpos >= 0: - endpos += start - else: + if endpos < 0: endpos = end assert endpos >= 0 self.pos = endpos diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -97,7 +97,7 @@ output_len -= 1 if output_len == 0: - return space.newutf8("", 1, FLAG_ASCII) + return space.newutf8("", 0, FLAG_ASCII) # Record which newlines are read and do newline translation if # desired, all in one pass. @@ -224,30 +224,28 @@ def _find_line_ending(self, line, start, end): size = end - start if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find(u'\n', start, end) + pos = line.find('\n', start, end) if pos >= 0: - return pos - start + 1, 0 + return pos + 1, 0 else: return -1, size elif self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces - i = 0 + i = start while True: - # Fast path for non-control chars. The loop always ends - # since the Py_UNICODE storage is NUL-terminated. - while i < size and line[start + i] > '\r': + # Fast path for non-control chars. + while i < end and line[i] > '\r': i += 1 - if i >= size: + if i >= end: return -1, size - ch = line[start + i] + ch = line[i] i += 1 if ch == '\n': return i, 0 if ch == '\r': - if line[start + i] == '\n': + if line[i] == '\n': return i + 1, 0 else: return i, 0 @@ -255,7 +253,7 @@ # Non-universal mode. pos = line.find(self.readnl, start, end) if pos >= 0: - return pos - start + len(self.readnl), 0 + return pos + len(self.readnl), 0 else: pos = line.find(self.readnl[0], start, end) if pos >= 0: @@ -520,8 +518,13 @@ # _____________________________________________________________ # read methods - def _set_decoded_chars(self, chars): - self.decoded_chars = chars + def _unset_decoded(self): + self.decoded_chars = None + self.decoded_chars_used = 0 + + def _set_decoded(self, space, w_decoded): + check_decoded(space, w_decoded) + self.decoded_chars = space.utf8_w(w_decoded) self.decoded_chars_used = 0 def _get_decoded_chars(self, size): @@ -580,8 +583,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.utf8_w(w_decoded)) + self._set_decoded(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -617,13 +619,13 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self._get_decoded_chars(-1)) + w_result = space.new_from_utf8(self._get_decoded_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final remaining = size - builder = UnicodeBuilder(size) + builder = StringBuilder(size) # Keep reading chunks until we have n characters to return while True: @@ -643,7 +645,7 @@ continue raise - return space.newunicode(builder.build()) + return space.new_from_utf8(builder.build()) def readline_w(self, space, w_limit=None): self._check_attached(space) @@ -651,11 +653,10 @@ self._writeflush(space) limit = convert_size(space, w_limit) - chunked = 0 line = None remaining = None - chunks = [] + builder = StringBuilder() while True: # First, get some data if necessary @@ -671,7 +672,7 @@ raise if not has_data: # end of file - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None start = endpos = offset_to_buffer = 0 break @@ -689,8 +690,8 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) + chunked = builder.getlength() if endpos >= 0: - endpos += start if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked assert endpos >= 0 @@ -708,15 +709,15 @@ # No line ending seen yet - put aside current data if endpos > start: s = line[start:endpos] - chunks.append(s) - chunked += len(s) + builder.append(s) + # There may be some remaining bytes we'll have to prepend to the # next chunk of data if endpos < line_len: remaining = line[endpos:] line = None # We have consumed the buffer - self._set_decoded_chars(None) + self._unset_decoded() if line: # Our line ends in the current buffer @@ -725,18 +726,12 @@ self.decoded_chars_used = decoded_chars_used if start > 0 or endpos < len(line): line = line[start:endpos] - if remaining: - chunks.append(remaining) - remaining = None - if chunks: - if line: - chunks.append(line) - line = u''.join(chunks) + builder.append(line) + elif remaining: + builder.append(remaining) - if line: - return space.newunicode(line) - else: - return space.newunicode(u'') + result = builder.build() + return space.new_from_utf8(result) # _____________________________________________________________ # write methods @@ -759,8 +754,8 @@ if text.find('\n') >= 0: haslf = True if haslf and self.writetranslate and self.writenl: - w_text = space.call_method(w_text, "replace", space.newunicode(u'\n'), - space.newunicode(self.writenl)) + w_text = space.call_method(w_text, "replace", space.new_from_utf8('\n'), + space.new_from_utf8(self.writenl)) text = space.utf8_w(w_text) needflush = False @@ -867,7 +862,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -892,7 +887,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -913,8 +908,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters if len(self.decoded_chars) < cookie.chars_to_skip: @@ -982,7 +976,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(input[i])) check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.bytes_to_feed += 1 diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,10 +367,23 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) + def new_from_utf8(self, utf8s): + # XXX: kill me! + assert isinstance(utf8s, str) + length, flag = rutf8.check_utf8(utf8s, True) + return W_UnicodeObject(utf8s, length, flag) + def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding + def newunicode(self, unistr): + # XXX: kill me! + assert isinstance(unistr, unicode) + utf8s = unistr.encode("utf-8") + length, flag = rutf8.check_utf8(utf8s, True) + return self.newutf8(utf8s, length, flag) + def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Thu Nov 23 04:33:47 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 23 Nov 2017 01:33:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Tests and fixes for 'allow_surrogates=True' in various unicode methods Message-ID: <5a1695fb.7996df0a.4610b.dcfb@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93135:16bfad77e3d5 Date: 2017-11-23 10:33 +0100 http://bitbucket.org/pypy/pypy/changeset/16bfad77e3d5/ Log: Tests and fixes for 'allow_surrogates=True' in various unicode methods diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py --- a/pypy/objspace/std/test/test_unicodeobject.py +++ b/pypy/objspace/std/test/test_unicodeobject.py @@ -299,6 +299,7 @@ assert u"Brown Fox".title() == u"Brown Fox" assert u"bro!wn fox".title() == u"Bro!Wn Fox" assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox" + assert u'\ud800'.title() == u'\ud800' def test_istitle(self): assert u"".istitle() == False @@ -328,10 +329,12 @@ assert u'A'.lower() == u'a' assert u'\u0105'.lower() == u'\u0105' assert u'\u0104'.lower() == u'\u0105' + assert u'\ud800'.lower() == u'\ud800' assert u'a'.upper() == u'A' assert u'A'.upper() == u'A' assert u'\u0105'.upper() == u'\u0104' assert u'\u0104'.upper() == u'\u0104' + assert u'\ud800'.upper() == u'\ud800' def test_capitalize(self): assert u"brown fox".capitalize() == u"Brown fox" @@ -354,6 +357,8 @@ # check with Ll chars with no upper - nothing changes here assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() == u'\u019b\u1d00\u1d86\u0221\u1fb7') + assert u'\ud800'.capitalize() == u'\ud800' + assert u'xx\ud800'.capitalize() == u'Xx\ud800' def test_rjust(self): s = u"abc" @@ -844,6 +849,7 @@ def test_swapcase(self): assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf' + assert u'\ud800'.swapcase() == u'\ud800' def test_buffer(self): buf = buffer(u'XY') diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -357,7 +357,7 @@ ch = unicodedb.toupper(ch) if ch >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch) + rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) return W_UnicodeObject(builder.build(), self._length, flag) def descr_title(self, space): @@ -382,7 +382,7 @@ ch = unicodedb.tolower(ch) if ch >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch) + rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) previous_is_cased = unicodedb.iscased(ch) return builder.build(), flag @@ -541,7 +541,7 @@ lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) if lower >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, lower) # XXX allow surrogates? + rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True) pos = rutf8.next_codepoint_pos(self._utf8, pos) return W_UnicodeObject(builder.build(), self._len(), flag) @@ -721,7 +721,7 @@ if uchar >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) i = rutf8.next_codepoint_pos(value, i) - rutf8.unichr_as_utf8_append(builder, uchar) + rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True) return W_UnicodeObject(builder.build(), self._length, flag) @unwrap_spec(width=int) @@ -831,14 +831,14 @@ uchar = rutf8.codepoint_at_pos(value, 0) i = rutf8.next_codepoint_pos(value, 0) ch = unicodedb.toupper(uchar) - rutf8.unichr_as_utf8_append(builder, ch) + rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) if ch >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) while i < len(value): uchar = rutf8.codepoint_at_pos(value, i) i = rutf8.next_codepoint_pos(value, i) ch = unicodedb.tolower(uchar) - rutf8.unichr_as_utf8_append(builder, ch) + rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) if ch >= 0x80: flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) return W_UnicodeObject(builder.build(), self._len(), flag) From pypy.commits at gmail.com Thu Nov 23 04:48:57 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 23 Nov 2017 01:48:57 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Review for surrogates Message-ID: <5a169989.93131c0a.19af0.57e5@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93136:dc6582a05b85 Date: 2017-11-23 10:48 +0100 http://bitbucket.org/pypy/pypy/changeset/dc6582a05b85/ Log: Review for surrogates diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py --- a/pypy/interpreter/unicodehelper.py +++ b/pypy/interpreter/unicodehelper.py @@ -370,14 +370,15 @@ builder.append(res) else: # when we get here, chr is a 32-bit unicode character - if chr > 0x10ffff: + try: + rutf8.unichr_as_utf8_append(builder, intmask(chr), True) + except ValueError: message = "illegal Unicode character" res, pos = errorhandler(errors, encoding, message, s, pos-2, pos+digits) size, flag = rutf8.check_utf8(res, True) builder.append(res) else: - rutf8.unichr_as_utf8_append(builder, intmask(chr), True) flag = rutf8.get_flag_from_code(intmask(chr)) pos += digits size = 1 @@ -466,7 +467,7 @@ pos += 1 x = (x<<3) + ord(ch) - ord('0') outsize += 1 - if x >= 0x7F: + if x > 0x7F: rutf8.unichr_as_utf8_append(builder, x) flag = combine_flags(rutf8.FLAG_REGULAR, flag) else: @@ -524,7 +525,9 @@ pos = look + 1 outsize += 1 flag = combine_flags(flag, rutf8.get_flag_from_code(code)) - rutf8.unichr_as_utf8_append(builder, code) + rutf8.unichr_as_utf8_append(builder, code, + allow_surrogates=True) + # xxx 'code' is probably always within range here... else: res, pos = errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) @@ -772,7 +775,8 @@ surrogate = 0 continue else: - rutf8.unichr_as_utf8_append(result, surrogate) + rutf8.unichr_as_utf8_append(result, surrogate, + allow_surrogates=True) flag = rutf8.FLAG_HAS_SURROGATES outsize += 1 surrogate = 0 @@ -1236,7 +1240,7 @@ result.append(r) continue - rutf8.unichr_as_utf8_append(result, ch) + rutf8.unichr_as_utf8_append(result, ch, allow_surrogates=True) pos += 4 r = result.build() lgt, flag = rutf8.check_utf8(r, True) @@ -1360,7 +1364,7 @@ s, pos, pos + unicode_bytes) result.append(res) continue - rutf8.unichr_as_utf8_append(result, intmask(t)) + rutf8.unichr_as_utf8_append(result, intmask(t), allow_surrogates=True) pos += unicode_bytes r = result.build() lgt, flag = rutf8.check_utf8(r, True) diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -127,7 +127,7 @@ errorcb, namecb, stringdata) src = pypy_cjk_dec_outbuf(decodebuf) length = pypy_cjk_dec_outlen(decodebuf) - return rffi.wcharpsize2utf8(src, length) + return rffi.wcharpsize2utf8(src, length) # assumes no out-of-range chars def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata): diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1012,6 +1012,7 @@ def wcharpsize2utf8(w, size): """ Helper to convert WCHARP pointer to utf8 in one go. Equivalent to wcharpsize2unicode().encode("utf8") + Raises ValueError if characters are outside range(0x110000)! """ from rpython.rlib import rutf8 From pypy.commits at gmail.com Thu Nov 23 09:41:28 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 23 Nov 2017 06:41:28 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Fixes for _cffi_backend Message-ID: <5a16de18.499edf0a.e853.9322@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93137:a94b5860dbb3 Date: 2017-11-23 15:40 +0100 http://bitbucket.org/pypy/pypy/changeset/a94b5860dbb3/ Log: Fixes for _cffi_backend diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -64,13 +64,10 @@ elif space.isinstance_w(w_value, space.w_unicode): from pypy.module._cffi_backend import wchar_helper w_u = space.convert_arg_to_w_unicode(w_value) - if self.citem.size == 4: + if self.ctitem.size == 2: + length = wchar_helper.utf8_size_as_char16(w_u._utf8) + else: length = w_u._len() - else: - if not w_u._has_surrogates(): - length = w_u._len() - else: - length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len()) return (w_value, length + 1) else: explicitlength = space.getindex_w(w_value, space.w_OverflowError) diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -40,16 +40,13 @@ return ord(s[0]) def cast_unicode(self, w_ob): - import pdb - pdb.set_trace() space = self.space w_u = space.convert_arg_to_w_unicode(w_ob) if w_u._len() != 1: raise oefmt(space.w_TypeError, "cannot cast unicode string of length %d to ctype '%s'", w_u._len(), self.name) - ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0) - return intmask(ordinal) + return rutf8.codepoint_at_pos(w_u._utf8, 0) def cast(self, w_ob): from pypy.module._cffi_backend import ctypeptr @@ -175,21 +172,19 @@ return self.space.newint(value) # r_uint => 'long' object def convert_to_object(self, cdata): - if self.is_signed_wchar: - code = ord(rffi.cast(rffi.CWCHARP, cdata)[0]) - return self.space.newutf8( - rutf8.unichr_as_utf8(code), 1, - rutf8.get_flag_from_code(code)) - else: - value = misc.read_raw_ulong_data(cdata, self.size) # r_uint - try: - u = wchar_helper.ordinal_to_unicode(value) - except wchar_helper.OutOfRange as e: - raise oefmt(self.space.w_ValueError, - "char32_t out of range for " - "conversion to unicode: %s", hex(e.ordinal)) - return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1, - rutf8.get_flag_from_code(ord(u))) + value = misc.read_raw_ulong_data(cdata, self.size) # r_uint + try: + utf8 = rutf8.unichr_as_utf8(value, allow_surrogates=True) + except ValueError: + if self.is_signed_wchar: + s = hex(intmask(value)) + else: + s = hex(value) + raise oefmt(self.space.w_ValueError, + "%s out of range for conversion to unicode: %s", + self.name, s) + flag = rutf8.get_flag_from_code(intmask(value)) + return self.space.newutf8(utf8, 1, flag) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -200,7 +195,13 @@ # returns a r_uint. If self.size == 2, it is smaller than 0x10000 space = self.space if space.isinstance_w(w_ob, space.w_unicode): - return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0) + w_u = space.convert_arg_to_w_unicode(w_ob) + if w_u._len() != 1: + raise self._convert_error("single character", w_ob) + ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0) + if self.size == 2 and ordinal > 0xFFFF: + raise self._convert_error("single character <= 0xFFFF", w_ob) + return r_uint(ordinal) elif (isinstance(w_ob, cdataobj.W_CData) and isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and w_ob.ctype.size == self.size): @@ -214,15 +215,15 @@ def unpack_ptr(self, w_ctypeptr, ptr, length): if self.size == 2: - u = wchar_helper.unicode_from_char16(ptr, length) + utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length) else: try: - u = wchar_helper.unicode_from_char32(ptr, length) + utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length) except wchar_helper.OutOfRange as e: raise oefmt(self.space.w_ValueError, - "char32_t out of range for " - "conversion to unicode: %s", hex(e.ordinal)) - return self.space.newunicode(u) + "%s out of range for conversion to unicode: %s", + self.name, hex(e.ordinal)) + return self.space.newutf8(utf8, lgt, flag) class W_CTypePrimitiveSigned(W_CTypePrimitive): diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -92,28 +92,20 @@ if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) w_u = space.convert_arg_to_w_unicode(w_ob) - if self.size == 4: + s = w_u._utf8 + if self.ctitem.size == 2: + n = wchar_helper.utf8_size_as_char16(s) + else: n = w_u._len() - else: - if not w_u._has_surrogates(): - n = w_u._len() - else: - n = wchar_helper.unicode_size_as_char16(w_u._utf8, - w_u._len()) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) add_final_zero = (n != self.length) if self.ctitem.size == 2: - try: - wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) - except wchar_helper.OutOfRange as e: - raise oefmt(self.space.w_ValueError, - "unicode character ouf of range for " - "conversion to char16_t: %s", hex(e.ordinal)) + wchar_helper.utf8_to_char16(s, cdata, n, add_final_zero) else: - wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero) + wchar_helper.utf8_to_char32(s, cdata, n, add_final_zero) else: raise self._convert_error("list or tuple", w_ob) @@ -334,8 +326,7 @@ from pypy.module._cffi_backend import wchar_helper w_u = space.convert_arg_to_w_unicode(w_init) if self.ctitem.size == 2: - length = wchar_helper.unicode_size_as_char16(w_u._utf8, - w_u._len()) + length = wchar_helper.utf8_size_as_char16(w_u._utf8) else: length = w_u._len() length += 1 diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py new file mode 100644 --- /dev/null +++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py @@ -0,0 +1,10 @@ +from hypothesis import given, strategies +from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16 + + + + at given(strategies.text()) +def test_utf8_size_as_char16(u): + assert type(u) is unicode + length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) + assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -6,41 +6,6 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw -SIZE_UNICODE = 4 - - -if SIZE_UNICODE == 4: - def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint - return unichr(intmask(ordinal)) -else: - def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint - if ordinal <= 0xffff: - return unichr(intmask(ordinal)) - elif ordinal <= 0x10ffff: - ordinal = intmask(ordinal - 0x10000) - return (unichr(0xD800 | (ordinal >> 10)) + - unichr(0xDC00 | (ordinal & 0x3FF))) - else: - raise OutOfRange(ordinal) - -def is_surrogate(u, index): - return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and - unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF)) - -def as_surrogate(u, index): - ordinal = (ord(u[index + 0]) - 0xD800) << 10 - ordinal |= (ord(u[index + 1]) - 0xDC00) - return r_uint(ordinal + 0x10000) - -def unicode_to_ordinal(u): - if len(u) == 1: - u = ord(u[0]) - return r_uint(u) - elif SIZE_UNICODE == 2: - if len(u) == 2 and is_surrogate(u, 0): - return r_uint(as_surrogate(u, 0)) - raise ValueError - class OutOfRange(Exception): ordinal = 0 @@ -49,59 +14,41 @@ ordinal = intmask(rffi.cast(rffi.INT, ordinal)) self.ordinal = ordinal -def _unicode_from_wchar(ptr, length): - return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length) +def utf8_from_char32(ptr, length): + # 'ptr' is a pointer to 'length' 32-bit integers + ptr = rffi.cast(rffi.UINTP, ptr) + u = StringBuilder(length) + j = 0 + flag = rutf8.FLAG_ASCII + while j < length: + ch = intmask(ptr[j]) + j += 1 + flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) + try: + rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) + except ValueError: + raise OutOfRange(ch) + return u.build(), length, flag - -if SIZE_UNICODE == 2: - def unicode_from_char32(ptr, length): - # 'ptr' is a pointer to 'length' 32-bit integers - ptr = rffi.cast(rffi.UINTP, ptr) - alloc = length - for i in range(length): - if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF: - alloc += 1 - - u = [u'\x00'] * alloc - j = 0 - for i in range(length): - ordinal = rffi.cast(lltype.Unsigned, ptr[i]) - if ordinal > 0xFFFF: - if ordinal > 0x10FFFF: - raise OutOfRange(ordinal) - ordinal = intmask(ordinal - 0x10000) - u[j] = unichr(0xD800 | (ordinal >> 10)) +def utf8_from_char16(ptr, length): + # 'ptr' is a pointer to 'length' 16-bit integers + ptr = rffi.cast(rffi.USHORTP, ptr) + u = StringBuilder(length) + j = 0 + result_length = length + flag = rutf8.FLAG_ASCII + while j < length: + ch = intmask(ptr[j]) + j += 1 + if 0xD800 <= ch <= 0xDBFF and j < length: + ch2 = intmask(ptr[j]) + if 0xDC00 <= ch2 <= 0xDFFF: + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 - u[j] = unichr(0xDC00 | (ordinal & 0x3FF)) - j += 1 - else: - u[j] = unichr(intmask(ordinal)) - j += 1 - assert j == len(u) - return u''.join(u) - - unicode_from_char16 = _unicode_from_wchar - -else: - unicode_from_char32 = _unicode_from_wchar - - def unicode_from_char16(ptr, length): - # 'ptr' is a pointer to 'length' 16-bit integers - ptr = rffi.cast(rffi.USHORTP, ptr) - u = StringBuilder(length) - i = 0 - j = 0 - while j < length: - ch = intmask(ptr[j]) - j += 1 - if 0xD800 <= ch <= 0xDBFF and j < length: - ch2 = intmask(ptr[j]) - if 0xDC00 <= ch2 <= 0xDFFF: - ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 - j += 1 - rutf8.unichr_as_utf8_append(u, ch) - i += 1 - return u.build() + result_length -= 1 + flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) + rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) + return u.build(), result_length, flag @specialize.ll() @@ -122,65 +69,44 @@ return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen) -def unicode_size_as_char16(u, len): - result = len - i = 0 - while i < len(u): - code = rutf8.codepoint_at_pos(u, i) - if code > 0xFFFF: - result += 1 - i = rutf8.next_codepoint_pos(u, i) +def utf8_size_as_char16(u): + # Counts one per unichar in 'u', or two if they are greater than 0xffff. + TABLE = "\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x02" + result = 0 + for c in u: + result += ord(TABLE[ord(c) >> 4]) return result -def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' wchars; - # we assume here that target_length == len(u). - unichardata = rffi.cast(rffi.CWCHARP, target_ptr) - copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length) +def utf8_to_char32(utf8, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers; + # we assume (and check) that target_length == number of unichars in utf8. + unichardata = rffi.cast(rffi.UINTP, target_ptr) + i = 0 + for j in range(target_length): + code = rutf8.codepoint_at_pos(utf8, i) + unichardata[j] = rffi.cast(rffi.UINT, code) + i = rutf8.next_codepoint_pos(utf8, i) + assert i == len(utf8) if add_final_zero: - unichardata[target_length] = u'\x00' + unichardata[target_length] = rffi.cast(rffi.UINT, 0) - -if SIZE_UNICODE == 2: - def unicode_to_char32(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers; - # we assume here that target_length == unicode_size_as_char32(u). - ptr = rffi.cast(rffi.UINTP, target_ptr) - src_index = 0 - last_surrogate_pos = len(u) - 2 - for i in range(target_length): - if src_index <= last_surrogate_pos and is_surrogate(u, src_index): - ordinal = as_surrogate(u, src_index) - src_index += 2 - else: - ordinal = r_uint(ord(u[src_index])) - src_index += 1 - ptr[i] = rffi.cast(rffi.UINT, ordinal) - if add_final_zero: - ptr[target_length] = rffi.cast(rffi.UINT, 0) - - unicode_to_char16 = _unicode_to_wchar - -else: - unicode_to_char32 = _unicode_to_wchar - - def unicode_to_char16(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers; - # we assume here that target_length == unicode_size_as_char16(u). - ptr = rffi.cast(rffi.USHORTP, target_ptr) - for uc in u: - ordinal = ord(uc) - if ordinal > 0xFFFF: - if ordinal > 0x10FFFF: - raise OutOfRange(ordinal) - ordinal -= 0x10000 - ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) - ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) - ptr = rffi.ptradd(ptr, 2) - else: - ptr[0] = rffi.cast(rffi.USHORT, ordinal) - ptr = rffi.ptradd(ptr, 1) - assert ptr == ( - rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length)) - if add_final_zero: - ptr[0] = rffi.cast(rffi.USHORT, 0) +def utf8_to_char16(utf8, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers; + # we assume (and check) that target_length == utf8_size_as_char16(utf8). + ptr = rffi.cast(rffi.USHORTP, target_ptr) + i = 0 + while i < len(utf8): + ordinal = rutf8.codepoint_at_pos(utf8, i) + if ordinal > 0xFFFF: + ordinal -= 0x10000 + ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) + ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) + ptr = rffi.ptradd(ptr, 2) + else: + ptr[0] = rffi.cast(rffi.USHORT, ordinal) + ptr = rffi.ptradd(ptr, 1) + i = rutf8.next_codepoint_pos(utf8, i) + assert ptr == ( + rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length)) + if add_final_zero: + ptr[0] = rffi.cast(rffi.USHORT, 0) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -453,6 +453,7 @@ ))))) def get_flag_from_code(oc): + assert isinstance(oc, int) if oc <= 0x7F: return FLAG_ASCII if 0xD800 <= oc <= 0xDFFF: From pypy.commits at gmail.com Thu Nov 23 09:50:35 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 06:50:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Utf8StringBuilder Message-ID: <5a16e03b.cb3a1c0a.79405.30ff@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93138:9ede67aee27e Date: 2017-11-23 15:49 +0100 http://bitbucket.org/pypy/pypy/changeset/9ede67aee27e/ Log: Utf8StringBuilder diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -16,9 +16,11 @@ """ import sys -from rpython.rlib.objectmodel import enforceargs, we_are_translated +from rpython.rlib.objectmodel import enforceargs, we_are_translated, specialize from rpython.rlib.rstring import StringBuilder from rpython.rlib import jit +from rpython.rlib.signature import signature +from rpython.rlib.types import char, none from rpython.rlib.rarithmetic import r_uint from rpython.rlib.unicodedata import unicodedb from rpython.rtyper.lltypesystem import lltype, rffi @@ -316,6 +318,11 @@ return res, flag raise CheckError(~res) +def get_utf8_length_flag(s): + """ Get the length and flag out of valid utf8. For now just calls check_utf8 + """ + return check_utf8(s, True) + @jit.elidable def _check_utf8(s, allow_surrogates, start, stop): pos = start @@ -655,6 +662,53 @@ return unicode_escape #, char_escape_helper +class Utf8StringBuilder(object): + def __init__(self, size=0): + self._s = StringBuilder(size) + self._lgt = 0 + self._flag = FLAG_ASCII + + def append(self, s): + # for strings + self._s.append(s) + newlgt, newflag = get_utf8_length_flag(s) + self._lgt += newlgt + self._flag = combine_flags(self._flag, newflag) + + @signature(char(), returns=none()) + def append_char(self, s): + # for characters, ascii + self._lgt += 1 + self._s.append(s) + + def append_code(self, code): + self._flag = combine_flags(self._flag, get_flag_from_code(code)) + self._lgt += 1 + unichr_as_utf8_append(self._s, code, True) + + def build(self): + return self._s.build() + + def get_flag(self): + return self._flag + + def get_length(self): + return self._lgt + +class Utf8StringIterator(object): + def __init__(self, utf8s): + self._utf8 = utf8s + self._end = len(utf8s) + self._pos = 0 + + def done(self): + return self._pos == self._end + + def next(self): + ret = codepoint_at_pos(self._utf8, self._pos) + self._pos = next_codepoint_pos(self._utf8, self._pos) + return ret + def decode_latin_1(s): if len(s) == 0: return s diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -139,3 +139,39 @@ result = rutf8.surrogate_in_utf8(uni) expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff') assert result == expected + + at given(strategies.text()) +def test_get_utf8_length_flag(u): + exp_lgt = len(u) + exp_flag = rutf8.FLAG_ASCII + for c in u: + if ord(c) > 0x7F: + exp_flag = rutf8.FLAG_REGULAR + lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8')) + assert lgt == exp_lgt + assert flag == exp_flag + +def test_utf8_string_builder(): + s = rutf8.Utf8StringBuilder() + s.append("foo") + s.append_char("x") + assert s.get_flag() == rutf8.FLAG_ASCII + assert s.get_length() == 4 + assert s.build() == "foox" + s.append(u"\u1234".encode("utf8")) + assert s.get_flag() == rutf8.FLAG_REGULAR + assert s.get_length() == 5 + assert s.build().decode("utf8") == u"foox\u1234" + s.append("foo") + s.append_char("x") + assert s.get_flag() == rutf8.FLAG_REGULAR + assert s.get_length() == 9 + assert s.build().decode("utf8") == u"foox\u1234foox" + s = rutf8.Utf8StringBuilder() + s.append_code(0x1234) + assert s.build().decode("utf8") == u"\u1234" + assert s.get_flag() == rutf8.FLAG_REGULAR + assert s.get_length() == 1 + s.append_code(0xD800) + assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES + assert s.get_length() == 2 From pypy.commits at gmail.com Thu Nov 23 09:50:37 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 06:50:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge Message-ID: <5a16e03d.c99edf0a.84d53.98d9@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93139:3e45feebc910 Date: 2017-11-23 15:49 +0100 http://bitbucket.org/pypy/pypy/changeset/3e45feebc910/ Log: merge diff --git a/pypy/module/_cffi_backend/ctypearray.py b/pypy/module/_cffi_backend/ctypearray.py --- a/pypy/module/_cffi_backend/ctypearray.py +++ b/pypy/module/_cffi_backend/ctypearray.py @@ -64,13 +64,10 @@ elif space.isinstance_w(w_value, space.w_unicode): from pypy.module._cffi_backend import wchar_helper w_u = space.convert_arg_to_w_unicode(w_value) - if self.citem.size == 4: + if self.ctitem.size == 2: + length = wchar_helper.utf8_size_as_char16(w_u._utf8) + else: length = w_u._len() - else: - if not w_u._has_surrogates(): - length = w_u._len() - else: - length = wchar_helper.unicode_size_as_char16(w_u._utf8, w_u._len()) return (w_value, length + 1) else: explicitlength = space.getindex_w(w_value, space.w_OverflowError) diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py --- a/pypy/module/_cffi_backend/ctypeprim.py +++ b/pypy/module/_cffi_backend/ctypeprim.py @@ -40,16 +40,13 @@ return ord(s[0]) def cast_unicode(self, w_ob): - import pdb - pdb.set_trace() space = self.space w_u = space.convert_arg_to_w_unicode(w_ob) if w_u._len() != 1: raise oefmt(space.w_TypeError, "cannot cast unicode string of length %d to ctype '%s'", w_u._len(), self.name) - ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0) - return intmask(ordinal) + return rutf8.codepoint_at_pos(w_u._utf8, 0) def cast(self, w_ob): from pypy.module._cffi_backend import ctypeptr @@ -175,21 +172,19 @@ return self.space.newint(value) # r_uint => 'long' object def convert_to_object(self, cdata): - if self.is_signed_wchar: - code = ord(rffi.cast(rffi.CWCHARP, cdata)[0]) - return self.space.newutf8( - rutf8.unichr_as_utf8(code), 1, - rutf8.get_flag_from_code(code)) - else: - value = misc.read_raw_ulong_data(cdata, self.size) # r_uint - try: - u = wchar_helper.ordinal_to_unicode(value) - except wchar_helper.OutOfRange as e: - raise oefmt(self.space.w_ValueError, - "char32_t out of range for " - "conversion to unicode: %s", hex(e.ordinal)) - return self.space.newutf8(rutf8.unichr_as_utf8(ord(u)), 1, - rutf8.get_flag_from_code(ord(u))) + value = misc.read_raw_ulong_data(cdata, self.size) # r_uint + try: + utf8 = rutf8.unichr_as_utf8(value, allow_surrogates=True) + except ValueError: + if self.is_signed_wchar: + s = hex(intmask(value)) + else: + s = hex(value) + raise oefmt(self.space.w_ValueError, + "%s out of range for conversion to unicode: %s", + self.name, s) + flag = rutf8.get_flag_from_code(intmask(value)) + return self.space.newutf8(utf8, 1, flag) def string(self, cdataobj, maxlen): with cdataobj as ptr: @@ -200,7 +195,13 @@ # returns a r_uint. If self.size == 2, it is smaller than 0x10000 space = self.space if space.isinstance_w(w_ob, space.w_unicode): - return rutf8.codepoint_at_pos(space.utf8_w(w_ob), 0) + w_u = space.convert_arg_to_w_unicode(w_ob) + if w_u._len() != 1: + raise self._convert_error("single character", w_ob) + ordinal = rutf8.codepoint_at_pos(w_u._utf8, 0) + if self.size == 2 and ordinal > 0xFFFF: + raise self._convert_error("single character <= 0xFFFF", w_ob) + return r_uint(ordinal) elif (isinstance(w_ob, cdataobj.W_CData) and isinstance(w_ob.ctype, W_CTypePrimitiveUniChar) and w_ob.ctype.size == self.size): @@ -214,15 +215,15 @@ def unpack_ptr(self, w_ctypeptr, ptr, length): if self.size == 2: - u = wchar_helper.unicode_from_char16(ptr, length) + utf8, lgt, flag = wchar_helper.utf8_from_char16(ptr, length) else: try: - u = wchar_helper.unicode_from_char32(ptr, length) + utf8, lgt, flag = wchar_helper.utf8_from_char32(ptr, length) except wchar_helper.OutOfRange as e: raise oefmt(self.space.w_ValueError, - "char32_t out of range for " - "conversion to unicode: %s", hex(e.ordinal)) - return self.space.newunicode(u) + "%s out of range for conversion to unicode: %s", + self.name, hex(e.ordinal)) + return self.space.newutf8(utf8, lgt, flag) class W_CTypePrimitiveSigned(W_CTypePrimitive): diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -92,28 +92,20 @@ if not space.isinstance_w(w_ob, space.w_unicode): raise self._convert_error("unicode or list or tuple", w_ob) w_u = space.convert_arg_to_w_unicode(w_ob) - if self.size == 4: + s = w_u._utf8 + if self.ctitem.size == 2: + n = wchar_helper.utf8_size_as_char16(s) + else: n = w_u._len() - else: - if not w_u._has_surrogates(): - n = w_u._len() - else: - n = wchar_helper.unicode_size_as_char16(w_u._utf8, - w_u._len()) if self.length >= 0 and n > self.length: raise oefmt(space.w_IndexError, "initializer unicode string is too long for '%s' " "(got %d characters)", self.name, n) add_final_zero = (n != self.length) if self.ctitem.size == 2: - try: - wchar_helper.unicode_to_char16(s, cdata, n, add_final_zero) - except wchar_helper.OutOfRange as e: - raise oefmt(self.space.w_ValueError, - "unicode character ouf of range for " - "conversion to char16_t: %s", hex(e.ordinal)) + wchar_helper.utf8_to_char16(s, cdata, n, add_final_zero) else: - wchar_helper.unicode_to_char32(s, cdata, n, add_final_zero) + wchar_helper.utf8_to_char32(s, cdata, n, add_final_zero) else: raise self._convert_error("list or tuple", w_ob) @@ -334,8 +326,7 @@ from pypy.module._cffi_backend import wchar_helper w_u = space.convert_arg_to_w_unicode(w_init) if self.ctitem.size == 2: - length = wchar_helper.unicode_size_as_char16(w_u._utf8, - w_u._len()) + length = wchar_helper.utf8_size_as_char16(w_u._utf8) else: length = w_u._len() length += 1 diff --git a/pypy/module/_cffi_backend/test/test_wchar_helper.py b/pypy/module/_cffi_backend/test/test_wchar_helper.py new file mode 100644 --- /dev/null +++ b/pypy/module/_cffi_backend/test/test_wchar_helper.py @@ -0,0 +1,10 @@ +from hypothesis import given, strategies +from pypy.module._cffi_backend.wchar_helper import utf8_size_as_char16 + + + + at given(strategies.text()) +def test_utf8_size_as_char16(u): + assert type(u) is unicode + length = utf8_size_as_char16(''.join(uc.encode('utf8') for uc in u)) + assert length == sum((1 if uc <= u'\uFFFF' else 2) for uc in u) diff --git a/pypy/module/_cffi_backend/wchar_helper.py b/pypy/module/_cffi_backend/wchar_helper.py --- a/pypy/module/_cffi_backend/wchar_helper.py +++ b/pypy/module/_cffi_backend/wchar_helper.py @@ -6,41 +6,6 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.lltypesystem.rstr import copy_unicode_to_raw -SIZE_UNICODE = 4 - - -if SIZE_UNICODE == 4: - def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint - return unichr(intmask(ordinal)) -else: - def ordinal_to_unicode(ordinal): # 'ordinal' is a r_uint - if ordinal <= 0xffff: - return unichr(intmask(ordinal)) - elif ordinal <= 0x10ffff: - ordinal = intmask(ordinal - 0x10000) - return (unichr(0xD800 | (ordinal >> 10)) + - unichr(0xDC00 | (ordinal & 0x3FF))) - else: - raise OutOfRange(ordinal) - -def is_surrogate(u, index): - return (unichr(0xD800) <= u[index + 0] <= unichr(0xDBFF) and - unichr(0xDC00) <= u[index + 1] <= unichr(0xDFFF)) - -def as_surrogate(u, index): - ordinal = (ord(u[index + 0]) - 0xD800) << 10 - ordinal |= (ord(u[index + 1]) - 0xDC00) - return r_uint(ordinal + 0x10000) - -def unicode_to_ordinal(u): - if len(u) == 1: - u = ord(u[0]) - return r_uint(u) - elif SIZE_UNICODE == 2: - if len(u) == 2 and is_surrogate(u, 0): - return r_uint(as_surrogate(u, 0)) - raise ValueError - class OutOfRange(Exception): ordinal = 0 @@ -49,59 +14,41 @@ ordinal = intmask(rffi.cast(rffi.INT, ordinal)) self.ordinal = ordinal -def _unicode_from_wchar(ptr, length): - return rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, ptr), length) +def utf8_from_char32(ptr, length): + # 'ptr' is a pointer to 'length' 32-bit integers + ptr = rffi.cast(rffi.UINTP, ptr) + u = StringBuilder(length) + j = 0 + flag = rutf8.FLAG_ASCII + while j < length: + ch = intmask(ptr[j]) + j += 1 + flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) + try: + rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) + except ValueError: + raise OutOfRange(ch) + return u.build(), length, flag - -if SIZE_UNICODE == 2: - def unicode_from_char32(ptr, length): - # 'ptr' is a pointer to 'length' 32-bit integers - ptr = rffi.cast(rffi.UINTP, ptr) - alloc = length - for i in range(length): - if rffi.cast(lltype.Unsigned, ptr[i]) > 0xFFFF: - alloc += 1 - - u = [u'\x00'] * alloc - j = 0 - for i in range(length): - ordinal = rffi.cast(lltype.Unsigned, ptr[i]) - if ordinal > 0xFFFF: - if ordinal > 0x10FFFF: - raise OutOfRange(ordinal) - ordinal = intmask(ordinal - 0x10000) - u[j] = unichr(0xD800 | (ordinal >> 10)) +def utf8_from_char16(ptr, length): + # 'ptr' is a pointer to 'length' 16-bit integers + ptr = rffi.cast(rffi.USHORTP, ptr) + u = StringBuilder(length) + j = 0 + result_length = length + flag = rutf8.FLAG_ASCII + while j < length: + ch = intmask(ptr[j]) + j += 1 + if 0xD800 <= ch <= 0xDBFF and j < length: + ch2 = intmask(ptr[j]) + if 0xDC00 <= ch2 <= 0xDFFF: + ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 j += 1 - u[j] = unichr(0xDC00 | (ordinal & 0x3FF)) - j += 1 - else: - u[j] = unichr(intmask(ordinal)) - j += 1 - assert j == len(u) - return u''.join(u) - - unicode_from_char16 = _unicode_from_wchar - -else: - unicode_from_char32 = _unicode_from_wchar - - def unicode_from_char16(ptr, length): - # 'ptr' is a pointer to 'length' 16-bit integers - ptr = rffi.cast(rffi.USHORTP, ptr) - u = StringBuilder(length) - i = 0 - j = 0 - while j < length: - ch = intmask(ptr[j]) - j += 1 - if 0xD800 <= ch <= 0xDBFF and j < length: - ch2 = intmask(ptr[j]) - if 0xDC00 <= ch2 <= 0xDFFF: - ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000 - j += 1 - rutf8.unichr_as_utf8_append(u, ch) - i += 1 - return u.build() + result_length -= 1 + flag = rutf8.combine_flags(flag, rutf8.get_flag_from_code(ch)) + rutf8.unichr_as_utf8_append(u, ch, allow_surrogates=True) + return u.build(), result_length, flag @specialize.ll() @@ -122,65 +69,44 @@ return _measure_length(rffi.cast(rffi.UINTP, ptr), maxlen) -def unicode_size_as_char16(u, len): - result = len - i = 0 - while i < len(u): - code = rutf8.codepoint_at_pos(u, i) - if code > 0xFFFF: - result += 1 - i = rutf8.next_codepoint_pos(u, i) +def utf8_size_as_char16(u): + # Counts one per unichar in 'u', or two if they are greater than 0xffff. + TABLE = "\x01\x01\x01\x01\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x02" + result = 0 + for c in u: + result += ord(TABLE[ord(c) >> 4]) return result -def _unicode_to_wchar(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' wchars; - # we assume here that target_length == len(u). - unichardata = rffi.cast(rffi.CWCHARP, target_ptr) - copy_unicode_to_raw(llunicode(u), unichardata, 0, target_length) +def utf8_to_char32(utf8, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers; + # we assume (and check) that target_length == number of unichars in utf8. + unichardata = rffi.cast(rffi.UINTP, target_ptr) + i = 0 + for j in range(target_length): + code = rutf8.codepoint_at_pos(utf8, i) + unichardata[j] = rffi.cast(rffi.UINT, code) + i = rutf8.next_codepoint_pos(utf8, i) + assert i == len(utf8) if add_final_zero: - unichardata[target_length] = u'\x00' + unichardata[target_length] = rffi.cast(rffi.UINT, 0) - -if SIZE_UNICODE == 2: - def unicode_to_char32(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' 32-bit integers; - # we assume here that target_length == unicode_size_as_char32(u). - ptr = rffi.cast(rffi.UINTP, target_ptr) - src_index = 0 - last_surrogate_pos = len(u) - 2 - for i in range(target_length): - if src_index <= last_surrogate_pos and is_surrogate(u, src_index): - ordinal = as_surrogate(u, src_index) - src_index += 2 - else: - ordinal = r_uint(ord(u[src_index])) - src_index += 1 - ptr[i] = rffi.cast(rffi.UINT, ordinal) - if add_final_zero: - ptr[target_length] = rffi.cast(rffi.UINT, 0) - - unicode_to_char16 = _unicode_to_wchar - -else: - unicode_to_char32 = _unicode_to_wchar - - def unicode_to_char16(u, target_ptr, target_length, add_final_zero): - # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers; - # we assume here that target_length == unicode_size_as_char16(u). - ptr = rffi.cast(rffi.USHORTP, target_ptr) - for uc in u: - ordinal = ord(uc) - if ordinal > 0xFFFF: - if ordinal > 0x10FFFF: - raise OutOfRange(ordinal) - ordinal -= 0x10000 - ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) - ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) - ptr = rffi.ptradd(ptr, 2) - else: - ptr[0] = rffi.cast(rffi.USHORT, ordinal) - ptr = rffi.ptradd(ptr, 1) - assert ptr == ( - rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length)) - if add_final_zero: - ptr[0] = rffi.cast(rffi.USHORT, 0) +def utf8_to_char16(utf8, target_ptr, target_length, add_final_zero): + # 'target_ptr' is a raw pointer to 'target_length' 16-bit integers; + # we assume (and check) that target_length == utf8_size_as_char16(utf8). + ptr = rffi.cast(rffi.USHORTP, target_ptr) + i = 0 + while i < len(utf8): + ordinal = rutf8.codepoint_at_pos(utf8, i) + if ordinal > 0xFFFF: + ordinal -= 0x10000 + ptr[0] = rffi.cast(rffi.USHORT, 0xD800 | (ordinal >> 10)) + ptr[1] = rffi.cast(rffi.USHORT, 0xDC00 | (ordinal & 0x3FF)) + ptr = rffi.ptradd(ptr, 2) + else: + ptr[0] = rffi.cast(rffi.USHORT, ordinal) + ptr = rffi.ptradd(ptr, 1) + i = rutf8.next_codepoint_pos(utf8, i) + assert ptr == ( + rffi.ptradd(rffi.cast(rffi.USHORTP, target_ptr), target_length)) + if add_final_zero: + ptr[0] = rffi.cast(rffi.USHORT, 0) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -460,6 +460,7 @@ ))))) def get_flag_from_code(oc): + assert isinstance(oc, int) if oc <= 0x7F: return FLAG_ASCII if 0xD800 <= oc <= 0xDFFF: From pypy.commits at gmail.com Thu Nov 23 09:57:40 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 06:57:40 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: provide explicit examples Message-ID: <5a16e1e4.a8a0df0a.fd2c9.6e63@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93140:d24fe4f59c96 Date: 2017-11-23 15:57 +0100 http://bitbucket.org/pypy/pypy/changeset/d24fe4f59c96/ Log: provide explicit examples diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -30,6 +30,7 @@ @settings(max_examples=10000) @given(strategies.binary(), strategies.booleans()) + at example('\xf1\x80\x80\x80', False) def test_check_utf8(s, allow_surrogates): _test_check_utf8(s, allow_surrogates) @@ -134,19 +135,23 @@ assert repr(u) == repr_func(u.encode('utf8')) @given(strategies.lists(strategies.characters())) + at example([u'\ud800', u'\udc00']) def test_surrogate_in_utf8(unichars): uni = u''.join(unichars).encode('utf-8') result = rutf8.surrogate_in_utf8(uni) expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff') assert result == expected - at given(strategies.text()) -def test_get_utf8_length_flag(u): + at given(strategies.lists(strategies.characters())) +def test_get_utf8_length_flag(unichars): + u = u''.join(unichars) exp_lgt = len(u) exp_flag = rutf8.FLAG_ASCII for c in u: if ord(c) > 0x7F: exp_flag = rutf8.FLAG_REGULAR + if 0xD800 <= ord(c) <= 0xDFFF: + exp_flag = rutf8.FLAG_HAS_SURROGATES lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8')) assert lgt == exp_lgt assert flag == exp_flag From pypy.commits at gmail.com Thu Nov 23 10:15:50 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 07:15:50 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix test on narrow host Message-ID: <5a16e626.54d91c0a.8efdd.759f@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93141:eb564d44a7c8 Date: 2017-11-23 16:15 +0100 http://bitbucket.org/pypy/pypy/changeset/eb564d44a7c8/ Log: fix test on narrow host diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -57,12 +57,13 @@ assert ~(length) == e.start else: assert valid - assert length == len(u) if flag == rutf8.FLAG_ASCII: s.decode('ascii') # assert did not raise elif flag == rutf8.FLAG_HAS_SURROGATES: assert allow_surrogates assert _has_surrogates(s) + if sys.maxunicode == 0x10FFFF or not _has_surrogates(s): + assert length == len(u) @given(strategies.characters()) def test_next_pos(uni): From pypy.commits at gmail.com Thu Nov 23 10:18:19 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 07:18:19 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix tests on narrow host Message-ID: <5a16e6bb.9085df0a.341f4.2ea2@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93142:fa3bcbe5b09f Date: 2017-11-23 16:17 +0100 http://bitbucket.org/pypy/pypy/changeset/fa3bcbe5b09f/ Log: fix tests on narrow host diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -138,7 +138,7 @@ @given(strategies.lists(strategies.characters())) @example([u'\ud800', u'\udc00']) def test_surrogate_in_utf8(unichars): - uni = u''.join(unichars).encode('utf-8') + uni = ''.join([u.encode('utf8') for u in unichars]) result = rutf8.surrogate_in_utf8(uni) expected = any(uch for uch in unichars if u'\ud800' <= uch <= u'\udfff') assert result == expected @@ -153,6 +153,7 @@ exp_flag = rutf8.FLAG_REGULAR if 0xD800 <= ord(c) <= 0xDFFF: exp_flag = rutf8.FLAG_HAS_SURROGATES + break lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8')) assert lgt == exp_lgt assert flag == exp_flag From pypy.commits at gmail.com Thu Nov 23 10:32:44 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 07:32:44 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: more tests Message-ID: <5a16ea1c.1cbf1c0a.deee6.077e@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93143:e4a568e4514c Date: 2017-11-23 16:32 +0100 http://bitbucket.org/pypy/pypy/changeset/e4a568e4514c/ Log: more tests diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -154,8 +154,9 @@ if 0xD800 <= ord(c) <= 0xDFFF: exp_flag = rutf8.FLAG_HAS_SURROGATES break - lgt, flag = rutf8.get_utf8_length_flag(u.encode('utf8')) - assert lgt == exp_lgt + lgt, flag = rutf8.get_utf8_length_flag(''.join([c.encode('utf8') for c in u])) + if exp_flag != rutf8.FLAG_HAS_SURROGATES: + assert lgt == exp_lgt assert flag == exp_flag def test_utf8_string_builder(): @@ -182,3 +183,11 @@ s.append_code(0xD800) assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES assert s.get_length() == 2 + + at given(strategies.text()) +def test_utf8_iterator(arg): + u = rutf8.Utf8StringIterator(arg.encode('utf8')) + l = [] + while not u.done(): + l.append(unichr(u.next())) + assert list(arg) == l From pypy.commits at gmail.com Thu Nov 23 10:46:47 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 07:46:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge default Message-ID: <5a16ed67.cb3a1c0a.79405.4789@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93144:177352fb8cf4 Date: 2017-11-23 16:46 +0100 http://bitbucket.org/pypy/pypy/changeset/177352fb8cf4/ Log: merge default diff too long, truncating to 2000 out of 7577 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -71,6 +71,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1027,21 +1027,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -119,7 +119,7 @@ tklib.TCL_GLOBAL_ONLY) # This is used to get the application class for Tk 4.1 and up - argv0 = className.lower() + argv0 = className.lower().encode('ascii') tklib.Tcl_SetVar(self.interp, "argv0", argv0, tklib.TCL_GLOBAL_ONLY) @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -10,3 +10,19 @@ .. branch: docs-osx-brew-openssl +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -85,13 +85,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -208,3 +208,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -290,66 +319,100 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def g(c): + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + assert stack() == ['test_f_back'] + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo + # def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -7,7 +7,7 @@ interpleveldefs = { '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', @@ -15,7 +15,9 @@ '_get_nullptr' : 'interp_cppyy.get_nullptr', 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { diff --git a/pypy/module/_cppyy/capi/loadable_capi.py b/pypy/module/_cppyy/capi/loadable_capi.py --- a/pypy/module/_cppyy/capi/loadable_capi.py +++ b/pypy/module/_cppyy/capi/loadable_capi.py @@ -217,7 +217,8 @@ 'method_req_args' : ([c_scope, c_index], c_int), 'method_arg_type' : ([c_scope, c_index, c_int], c_ccharp), 'method_arg_default' : ([c_scope, c_index, c_int], c_ccharp), - 'method_signature' : ([c_scope, c_index], c_ccharp), + 'method_signature' : ([c_scope, c_index, c_int], c_ccharp), + 'method_prototype' : ([c_scope, c_index, c_int], c_ccharp), 'method_is_template' : ([c_scope, c_index], c_int), 'method_num_template_args' : ([c_scope, c_index], c_int), @@ -498,9 +499,12 @@ def c_method_arg_default(space, cppscope, index, arg_index): args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(arg_index)] return charp2str_free(space, call_capi(space, 'method_arg_default', args)) -def c_method_signature(space, cppscope, index): - args = [_ArgH(cppscope.handle), _ArgL(index)] +def c_method_signature(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] return charp2str_free(space, call_capi(space, 'method_signature', args)) +def c_method_prototype(space, cppscope, index, show_formalargs=True): + args = [_ArgH(cppscope.handle), _ArgL(index), _ArgL(show_formalargs)] + return charp2str_free(space, call_capi(space, 'method_prototype', args)) def c_method_is_template(space, cppscope, index): args = [_ArgH(cppscope.handle), _ArgL(index)] diff --git a/pypy/module/_cppyy/converter.py b/pypy/module/_cppyy/converter.py --- a/pypy/module/_cppyy/converter.py +++ b/pypy/module/_cppyy/converter.py @@ -4,7 +4,7 @@ from rpython.rtyper.lltypesystem import rffi, lltype from rpython.rlib.rarithmetic import r_singlefloat, r_longfloat -from rpython.rlib import rfloat +from rpython.rlib import rfloat, rawrefcount from pypy.module._rawffi.interp_rawffi import letter2tp from pypy.module._rawffi.array import W_Array, W_ArrayInstance @@ -21,9 +21,9 @@ # match for the qualified type. -def get_rawobject(space, w_obj): +def get_rawobject(space, w_obj, can_be_None=True): from pypy.module._cppyy.interp_cppyy import W_CPPClass - cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=True) + cppinstance = space.interp_w(W_CPPClass, w_obj, can_be_None=can_be_None) if cppinstance: rawobject = cppinstance.get_rawobject() assert lltype.typeOf(rawobject) == capi.C_OBJECT @@ -48,17 +48,16 @@ return capi.C_NULL_OBJECT def is_nullpointer_specialcase(space, w_obj): - # 0, None, and nullptr may serve as "NULL", check for any of them + # 0 and nullptr may serve as "NULL" # integer 0 try: return space.int_w(w_obj) == 0 except Exception: pass - # None or nullptr + # C++-style nullptr from pypy.module._cppyy import interp_cppyy - return space.is_true(space.is_(w_obj, space.w_None)) or \ - space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) + return space.is_true(space.is_(w_obj, interp_cppyy.get_nullptr(space))) def get_rawbuffer(space, w_obj): # raw buffer @@ -74,7 +73,7 @@ return rffi.cast(rffi.VOIDP, space.uint_w(arr.getbuffer(space))) except Exception: pass - # pre-defined NULL + # pre-defined nullptr if is_nullpointer_specialcase(space, w_obj): return rffi.cast(rffi.VOIDP, 0) raise TypeError("not an addressable buffer") @@ -392,6 +391,7 @@ _immutable_fields_ = ['typecode'] typecode = 'g' + class CStringConverter(TypeConverter): def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.LONGP, address) @@ -408,18 +408,27 @@ def free_argument(self, space, arg, call_local): lltype.free(rffi.cast(rffi.CCHARPP, arg)[0], flavor='raw') +class CStringConverterWithSize(CStringConverter): + _immutable_fields_ = ['size'] + + def __init__(self, space, extra): + self.size = extra + + def from_memory(self, space, w_obj, w_pycppclass, offset): + address = self._get_raw_address(space, w_obj, offset) + charpptr = rffi.cast(rffi.CCHARP, address) + strsize = self.size + if charpptr[self.size-1] == '\0': + strsize = self.size-1 # rffi will add \0 back + return space.newbytes(rffi.charpsize2str(charpptr, strsize)) + class VoidPtrConverter(TypeConverter): def _unwrap_object(self, space, w_obj): try: obj = get_rawbuffer(space, w_obj) except TypeError: - try: - # TODO: accept a 'capsule' rather than naked int - # (do accept int(0), though) - obj = rffi.cast(rffi.VOIDP, space.uint_w(w_obj)) - except Exception: - obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) + obj = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj, False)) return obj def cffi_type(self, space): @@ -463,12 +472,12 @@ def convert_argument(self, space, w_obj, address, call_local): x = rffi.cast(rffi.VOIDPP, address) ba = rffi.cast(rffi.CCHARP, address) - r = rffi.cast(rffi.VOIDPP, call_local) try: - r[0] = get_rawbuffer(space, w_obj) + x[0] = get_rawbuffer(space, w_obj) except TypeError: + r = rffi.cast(rffi.VOIDPP, call_local) r[0] = rffi.cast(rffi.VOIDP, get_rawobject(space, w_obj)) - x[0] = rffi.cast(rffi.VOIDP, call_local) + x[0] = rffi.cast(rffi.VOIDP, call_local) ba[capi.c_function_arg_typeoffset(space)] = self.typecode def finalize_call(self, space, w_obj, call_local): @@ -495,9 +504,13 @@ def _unwrap_object(self, space, w_obj): from pypy.module._cppyy.interp_cppyy import W_CPPClass if isinstance(w_obj, W_CPPClass): - if capi.c_is_subtype(space, w_obj.cppclass, self.clsdecl): + from pypy.module._cppyy.interp_cppyy import INSTANCE_FLAGS_IS_R_VALUE + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + # reject moves as all are explicit + raise ValueError("lvalue expected") + if capi.c_is_subtype(space, w_obj.clsdecl, self.clsdecl): rawobject = w_obj.get_rawobject() - offset = capi.c_base_offset(space, w_obj.cppclass, self.clsdecl, rawobject, 1) + offset = capi.c_base_offset(space, w_obj.clsdecl, self.clsdecl, rawobject, 1) obj_address = capi.direct_ptradd(rawobject, offset) return rffi.cast(capi.C_OBJECT, obj_address) raise oefmt(space.w_TypeError, @@ -518,6 +531,17 @@ x = rffi.cast(rffi.VOIDPP, address) x[0] = rffi.cast(rffi.VOIDP, self._unwrap_object(space, w_obj)) +class InstanceMoveConverter(InstanceRefConverter): + def _unwrap_object(self, space, w_obj): + # moving is same as by-ref, but have to check that move is allowed + from pypy.module._cppyy.interp_cppyy import W_CPPClass, INSTANCE_FLAGS_IS_R_VALUE + if isinstance(w_obj, W_CPPClass): + if w_obj.flags & INSTANCE_FLAGS_IS_R_VALUE: + w_obj.flags &= ~INSTANCE_FLAGS_IS_R_VALUE + return InstanceRefConverter._unwrap_object(self, space, w_obj) + raise oefmt(space.w_ValueError, "object is not an rvalue") + + class InstanceConverter(InstanceRefConverter): def convert_argument_libffi(self, space, w_obj, address, call_local): @@ -527,7 +551,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): self._is_abstract(space) @@ -548,7 +572,7 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False) + return interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False) def to_memory(self, space, w_obj, w_value, offset): address = rffi.cast(rffi.VOIDPP, self._get_raw_address(space, w_obj, offset)) @@ -582,8 +606,8 @@ def from_memory(self, space, w_obj, w_pycppclass, offset): address = rffi.cast(capi.C_OBJECT, self._get_raw_address(space, w_obj, offset)) from pypy.module._cppyy import interp_cppyy - return interp_cppyy.wrap_cppobject(space, address, self.clsdecl, - do_cast=False, is_ref=True) + return interp_cppyy.wrap_cppinstance( + space, address, self.clsdecl, do_cast=False, is_ref=True) class StdStringConverter(InstanceConverter): @@ -606,7 +630,7 @@ assign = self.clsdecl.get_overload("__assign__") from pypy.module._cppyy import interp_cppyy assign.call( - interp_cppyy.wrap_cppobject(space, address, self.clsdecl, do_cast=False), [w_value]) + interp_cppyy.wrap_cppinstance(space, address, self.clsdecl, do_cast=False), [w_value]) except Exception: InstanceConverter.to_memory(self, space, w_obj, w_value, offset) @@ -672,7 +696,7 @@ _converters = {} # builtin and custom types _a_converters = {} # array and ptr versions of above -def get_converter(space, name, default): +def get_converter(space, _name, default): # The matching of the name to a converter should follow: # 1) full, exact match # 1a) const-removed match @@ -680,9 +704,9 @@ # 3) accept ref as pointer (for the stubs, const& can be # by value, but that does not work for the ffi path) # 4) generalized cases (covers basically all user classes) - # 5) void converter, which fails on use + # 5) void* or void converter (which fails on use) - name = capi.c_resolve_name(space, name) + name = capi.c_resolve_name(space, _name) # 1) full, exact match try: @@ -701,7 +725,7 @@ clean_name = capi.c_resolve_name(space, helper.clean_type(name)) try: # array_index may be negative to indicate no size or no size found - array_size = helper.array_size(name) + array_size = helper.array_size(_name) # uses original arg return _a_converters[clean_name+compound](space, array_size) except KeyError: pass @@ -719,6 +743,8 @@ return InstancePtrConverter(space, clsdecl) elif compound == "&": return InstanceRefConverter(space, clsdecl) + elif compound == "&&": + return InstanceMoveConverter(space, clsdecl) elif compound == "**": return InstancePtrPtrConverter(space, clsdecl) elif compound == "": @@ -726,11 +752,13 @@ elif capi.c_is_enum(space, clean_name): return _converters['unsigned'](space, default) - # 5) void converter, which fails on use - # + # 5) void* or void converter (which fails on use) + if 0 <= compound.find('*'): + return VoidPtrConverter(space, default) # "user knows best" + # return a void converter here, so that the class can be build even - # when some types are unknown; this overload will simply fail on use - return VoidConverter(space, name) + # when some types are unknown + return VoidConverter(space, name) # fails on use _converters["bool"] = BoolConverter @@ -847,6 +875,10 @@ for name in names: _a_converters[name+'[]'] = ArrayConverter _a_converters[name+'*'] = PtrConverter + + # special case, const char* w/ size and w/o '\0' + _a_converters["const char[]"] = CStringConverterWithSize + _build_array_converters() # add another set of aliased names diff --git a/pypy/module/_cppyy/executor.py b/pypy/module/_cppyy/executor.py --- a/pypy/module/_cppyy/executor.py +++ b/pypy/module/_cppyy/executor.py @@ -159,7 +159,7 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_l(space, cppmethod, cppthis, num_args, args) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - pyres = interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + pyres = interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) return pyres def execute_libffi(self, space, cif_descr, funcaddr, buffer): @@ -167,7 +167,7 @@ result = rffi.ptradd(buffer, cif_descr.exchange_result) from pypy.module._cppyy import interp_cppyy ptr_result = rffi.cast(capi.C_OBJECT, rffi.cast(rffi.VOIDPP, result)[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) class InstancePtrPtrExecutor(InstancePtrExecutor): @@ -176,7 +176,7 @@ voidp_result = capi.c_call_r(space, cppmethod, cppthis, num_args, args) ref_address = rffi.cast(rffi.VOIDPP, voidp_result) ptr_result = rffi.cast(capi.C_OBJECT, ref_address[0]) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible @@ -188,8 +188,8 @@ from pypy.module._cppyy import interp_cppyy long_result = capi.c_call_o(space, cppmethod, cppthis, num_args, args, self.cppclass) ptr_result = rffi.cast(capi.C_OBJECT, long_result) - return interp_cppyy.wrap_cppobject(space, ptr_result, self.cppclass, - do_cast=False, python_owns=True, fresh=True) + return interp_cppyy.wrap_cppinstance(space, ptr_result, self.cppclass, + do_cast=False, python_owns=True, fresh=True) def execute_libffi(self, space, cif_descr, funcaddr, buffer): from pypy.module._cppyy.interp_cppyy import FastCallNotPossible diff --git a/pypy/module/_cppyy/include/capi.h b/pypy/module/_cppyy/include/capi.h --- a/pypy/module/_cppyy/include/capi.h +++ b/pypy/module/_cppyy/include/capi.h @@ -19,14 +19,15 @@ RPY_EXTERN int cppyy_num_scopes(cppyy_scope_t parent); RPY_EXTERN - char* cppyy_scope_name(cppyy_scope_t parent, int iscope); - + char* cppyy_scope_name(cppyy_scope_t parent, cppyy_index_t iscope); RPY_EXTERN char* cppyy_resolve_name(const char* cppitem_name); RPY_EXTERN cppyy_scope_t cppyy_get_scope(const char* scope_name); RPY_EXTERN cppyy_type_t cppyy_actual_class(cppyy_type_t klass, cppyy_object_t obj); + RPY_EXTERN + size_t cppyy_size_of(cppyy_type_t klass); /* memory management ------------------------------------------------------ */ RPY_EXTERN @@ -120,6 +121,8 @@ RPY_EXTERN char* cppyy_method_name(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN + char* cppyy_method_mangled_name(cppyy_scope_t scope, cppyy_index_t idx); + RPY_EXTERN char* cppyy_method_result_type(cppyy_scope_t scope, cppyy_index_t idx); RPY_EXTERN int cppyy_method_num_args(cppyy_scope_t scope, cppyy_index_t idx); @@ -130,7 +133,9 @@ RPY_EXTERN char* cppyy_method_arg_default(cppyy_scope_t scope, cppyy_index_t idx, int arg_index); RPY_EXTERN - char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx); + char* cppyy_method_signature(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); + RPY_EXTERN + char* cppyy_method_prototype(cppyy_scope_t scope, cppyy_index_t idx, int show_formalargs); RPY_EXTERN int cppyy_method_is_template(cppyy_scope_t scope, cppyy_index_t idx); @@ -147,8 +152,12 @@ /* method properties ------------------------------------------------------ */ RPY_EXTERN + int cppyy_is_publicmethod(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_constructor(cppyy_type_t type, cppyy_index_t idx); RPY_EXTERN + int cppyy_is_destructor(cppyy_type_t type, cppyy_index_t idx); + RPY_EXTERN int cppyy_is_staticmethod(cppyy_type_t type, cppyy_index_t idx); /* data member reflection information ------------------------------------- */ diff --git a/pypy/module/_cppyy/interp_cppyy.py b/pypy/module/_cppyy/interp_cppyy.py --- a/pypy/module/_cppyy/interp_cppyy.py +++ b/pypy/module/_cppyy/interp_cppyy.py @@ -2,7 +2,7 @@ from pypy.interpreter.error import OperationError, oefmt from pypy.interpreter.gateway import interp2app, unwrap_spec -from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty, interp_attrproperty_w +from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty from pypy.interpreter.baseobjspace import W_Root from rpython.rtyper.lltypesystem import rffi, lltype, llmemory @@ -15,6 +15,10 @@ from pypy.module._cppyy import converter, executor, ffitypes, helper +INSTANCE_FLAGS_PYTHON_OWNS = 0x0001 +INSTANCE_FLAGS_IS_REF = 0x0002 +INSTANCE_FLAGS_IS_R_VALUE = 0x0004 + class FastCallNotPossible(Exception): pass @@ -33,16 +37,21 @@ class State(object): def __init__(self, space): + # final scoped name -> opaque handle self.cppscope_cache = { - "void" : W_CPPClassDecl(space, "void", capi.C_NULL_TYPE) } + 'void' : W_CPPClassDecl(space, capi.C_NULL_TYPE, 'void') } + # opaque handle -> app-level python class + self.cppclass_registry = {} + # app-level class generator callback + self.w_clgen_callback = None + # app-level function generator callback (currently not used) + self.w_fngen_callback = None + # C++11's nullptr self.w_nullptr = None - self.cpptemplate_cache = {} - self.cppclass_registry = {} - self.w_clgen_callback = None - self.w_fngen_callback = None def get_nullptr(space): - if hasattr(space, "fake"): + # construct a unique address that compares to NULL, serves as nullptr + if hasattr(space, 'fake'): raise NotImplementedError state = space.fromcache(State) if state.w_nullptr is None: @@ -58,52 +67,48 @@ state.w_nullptr = nullarr return state.w_nullptr - at unwrap_spec(name='text') -def resolve_name(space, name): - return space.newtext(capi.c_resolve_name(space, name)) + at unwrap_spec(scoped_name='text') +def resolve_name(space, scoped_name): + return space.newtext(capi.c_resolve_name(space, scoped_name)) - at unwrap_spec(name='text') -def scope_byname(space, name): - true_name = capi.c_resolve_name(space, name) +# memoized lookup of handles by final, scoped, name of classes/namespaces + at unwrap_spec(final_scoped_name='text') +def scope_byname(space, final_scoped_name): state = space.fromcache(State) try: - return state.cppscope_cache[true_name] + return state.cppscope_cache[final_scoped_name] except KeyError: pass - opaque_handle = capi.c_get_scope_opaque(space, true_name) + opaque_handle = capi.c_get_scope_opaque(space, final_scoped_name) assert lltype.typeOf(opaque_handle) == capi.C_SCOPE if opaque_handle: - final_name = capi.c_final_name(space, opaque_handle) - if capi.c_is_namespace(space, opaque_handle): - cppscope = W_CPPNamespaceDecl(space, final_name, opaque_handle) - elif capi.c_has_complex_hierarchy(space, opaque_handle): - cppscope = W_CPPComplexClassDecl(space, final_name, opaque_handle) + isns = capi.c_is_namespace(space, opaque_handle) + if isns: + cppscope = W_CPPNamespaceDecl(space, opaque_handle, final_scoped_name) else: - cppscope = W_CPPClassDecl(space, final_name, opaque_handle) - state.cppscope_cache[name] = cppscope + if capi.c_has_complex_hierarchy(space, opaque_handle): + cppscope = W_CPPComplexClassDecl(space, opaque_handle, final_scoped_name) + else: + cppscope = W_CPPClassDecl(space, opaque_handle, final_scoped_name) - cppscope._build_methods() - cppscope._find_datamembers() + # store in the cache to prevent recursion + state.cppscope_cache[final_scoped_name] = cppscope + + if not isns: + # build methods/data; TODO: also defer this for classes (a functional __dir__ + # and instrospection for help() is enough and allows more lazy loading) + cppscope._build_methods() + cppscope._find_datamembers() + return cppscope return None - at unwrap_spec(name='text') -def template_byname(space, name): - state = space.fromcache(State) - try: - return state.cpptemplate_cache[name] - except KeyError: - pass - - if capi.c_is_template(space, name): - cpptemplate = W_CPPTemplateType(space, name) - state.cpptemplate_cache[name] = cpptemplate - return cpptemplate - - return None + at unwrap_spec(final_scoped_name='text') +def is_template(space, final_scoped_name): + return space.newbool(capi.c_is_template(space, final_scoped_name)) def std_string_name(space): return space.newtext(capi.std_string_name) @@ -189,8 +194,13 @@ # check number of given arguments against required (== total - defaults) args_expected = len(self.arg_defs) args_given = len(args_w) - if args_expected < args_given or args_given < self.args_required: - raise oefmt(self.space.w_TypeError, "wrong number of arguments") + + if args_given < self.args_required: + raise oefmt(self.space.w_TypeError, + "takes at least %d arguments (%d given)", self.args_required, args_given) + elif args_expected < args_given: + raise oefmt(self.space.w_TypeError, + "takes at most %d arguments (%d given)", args_expected, args_given) # initial setup of converters, executors, and libffi (if available) if self.converters is None: @@ -376,8 +386,11 @@ conv.free_argument(self.space, rffi.cast(capi.C_OBJECT, arg_i), loc_i) capi.c_deallocate_function_args(self.space, args) - def signature(self): - return capi.c_method_signature(self.space, self.scope, self.index) + def signature(self, show_formalargs=True): + return capi.c_method_signature(self.space, self.scope, self.index, show_formalargs) + + def prototype(self, show_formalargs=True): + return capi.c_method_prototype(self.space, self.scope, self.index, show_formalargs) def priority(self): total_arg_priority = 0 @@ -391,7 +404,7 @@ lltype.free(self.cif_descr, flavor='raw') def __repr__(self): - return "CPPMethod: %s" % self.signature() + return "CPPMethod: %s" % self.prototype() def _freeze_(self): assert 0, "you should never have a pre-built instance of this!" @@ -407,7 +420,7 @@ return capi.C_NULL_OBJECT def __repr__(self): - return "CPPFunction: %s" % self.signature() + return "CPPFunction: %s" % self.prototype() class CPPTemplatedCall(CPPMethod): @@ -440,7 +453,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPTemplatedCall: %s" % self.signature() + return "CPPTemplatedCall: %s" % self.prototype() class CPPConstructor(CPPMethod): @@ -462,7 +475,7 @@ return CPPMethod.call(self, cppthis, args_w) def __repr__(self): - return "CPPConstructor: %s" % self.signature() + return "CPPConstructor: %s" % self.prototype() class CPPSetItem(CPPMethod): @@ -549,12 +562,12 @@ w_exc_type = e.w_type elif all_same_type and not e.match(self.space, w_exc_type): all_same_type = False - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' '+e.errorstr(self.space) except Exception as e: # can not special case this for non-overloaded functions as we anyway need an # OperationError error down from here - errmsg += '\n '+cppyyfunc.signature()+' =>\n' + errmsg += '\n '+cppyyfunc.prototype()+' =>\n' errmsg += ' Exception: '+str(e) if all_same_type and w_exc_type is not None: @@ -562,20 +575,20 @@ else: raise OperationError(self.space.w_TypeError, self.space.newtext(errmsg)) - def signature(self): - sig = self.functions[0].signature() + def prototype(self): + sig = self.functions[0].prototype() for i in range(1, len(self.functions)): - sig += '\n'+self.functions[i].signature() + sig += '\n'+self.functions[i].prototype() return self.space.newtext(sig) def __repr__(self): - return "W_CPPOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPOverload(%s)" % [f.prototype() for f in self.functions] W_CPPOverload.typedef = TypeDef( 'CPPOverload', is_static = interp2app(W_CPPOverload.is_static), call = interp2app(W_CPPOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPOverload.prototype), ) @@ -591,24 +604,40 @@ @jit.unroll_safe @unwrap_spec(args_w='args_w') def call(self, w_cppinstance, args_w): + # TODO: factor out the following: + if capi.c_is_abstract(self.space, self.scope.handle): + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.scope.name) w_result = W_CPPOverload.call(self, w_cppinstance, args_w) newthis = rffi.cast(capi.C_OBJECT, self.space.uint_w(w_result)) cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if cppinstance is not None: cppinstance._rawobject = newthis memory_regulator.register(cppinstance) - return w_cppinstance - return wrap_cppobject(self.space, newthis, self.functions[0].scope, - do_cast=False, python_owns=True, fresh=True) def __repr__(self): - return "W_CPPConstructorOverload(%s)" % [f.signature() for f in self.functions] + return "W_CPPConstructorOverload(%s)" % [f.prototype() for f in self.functions] W_CPPConstructorOverload.typedef = TypeDef( 'CPPConstructorOverload', is_static = interp2app(W_CPPConstructorOverload.is_static), call = interp2app(W_CPPConstructorOverload.call), - signature = interp2app(W_CPPOverload.signature), + prototype = interp2app(W_CPPConstructorOverload.prototype), +) + + +class W_CPPTemplateOverload(W_CPPOverload): + @unwrap_spec(args_w='args_w') + def __getitem__(self, args_w): + pass + + def __repr__(self): + return "W_CPPTemplateOverload(%s)" % [f.prototype() for f in self.functions] + +W_CPPTemplateOverload.typedef = TypeDef( + 'CPPTemplateOverload', + __getitem__ = interp2app(W_CPPTemplateOverload.call), ) @@ -622,6 +651,9 @@ def __call__(self, args_w): return self.method.bound_call(self.cppthis, args_w) + def __repr__(self): + return "W_CPPBoundMethod(%s)" % [f.prototype() for f in self.functions] + W_CPPBoundMethod.typedef = TypeDef( 'CPPBoundMethod', __call__ = interp2app(W_CPPBoundMethod.__call__), @@ -643,8 +675,8 @@ def _get_offset(self, cppinstance): if cppinstance: - assert lltype.typeOf(cppinstance.cppclass.handle) == lltype.typeOf(self.scope.handle) - offset = self.offset + cppinstance.cppclass.get_base_offset(cppinstance, self.scope) + assert lltype.typeOf(cppinstance.clsdecl.handle) == lltype.typeOf(self.scope.handle) + offset = self.offset + cppinstance.clsdecl.get_base_offset(cppinstance, self.scope) else: offset = self.offset return offset @@ -652,7 +684,7 @@ def get(self, w_cppinstance, w_pycppclass): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) return self.converter.from_memory(self.space, w_cppinstance, w_pycppclass, offset) @@ -660,7 +692,7 @@ def set(self, w_cppinstance, w_value): cppinstance = self.space.interp_w(W_CPPClass, w_cppinstance, can_be_None=True) if not cppinstance: - raise oefmt(self.space.w_ReferenceError, + raise oefmt(self.space.w_AttributeError, "attribute access requires an instance") offset = self._get_offset(cppinstance) self.converter.to_memory(self.space, w_cppinstance, w_value, offset) @@ -705,12 +737,12 @@ return space.w_False class W_CPPScopeDecl(W_Root): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] _immutable_fields_ = ['handle', 'name'] - def __init__(self, space, name, opaque_handle): + def __init__(self, space, opaque_handle, final_scoped_name): self.space = space - self.name = name + self.name = final_scoped_name assert lltype.typeOf(opaque_handle) == capi.C_SCOPE self.handle = opaque_handle self.methods = {} @@ -753,7 +785,7 @@ overload = self.get_overload(name) sig = '(%s)' % signature for f in overload.functions: - if 0 < f.signature().find(sig): + if f.signature(False) == sig: return W_CPPOverload(self.space, self, [f]) raise oefmt(self.space.w_LookupError, "no overload matches signature") @@ -769,6 +801,9 @@ # classes for inheritance. Both are python classes, though, and refactoring # may be in order at some point. class W_CPPNamespaceDecl(W_CPPScopeDecl): + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name'] + def _make_cppfunction(self, pyname, index): num_args = capi.c_method_num_args(self.space, self, index) args_required = capi.c_method_req_args(self.space, self, index) @@ -779,9 +814,6 @@ arg_defs.append((arg_type, arg_dflt)) return CPPFunction(self.space, self, index, arg_defs, args_required) - def _build_methods(self): - pass # force lazy lookups in namespaces - def _make_datamember(self, dm_name, dm_idx): type_name = capi.c_datamember_type(self.space, self, dm_idx) offset = capi.c_datamember_offset(self.space, self, dm_idx) @@ -791,9 +823,6 @@ self.datamembers[dm_name] = datamember return datamember - def _find_datamembers(self): - pass # force lazy lookups in namespaces - def find_overload(self, meth_name): indices = capi.c_method_indices_from_name(self.space, self, meth_name) if not indices: @@ -855,18 +884,21 @@ class W_CPPClassDecl(W_CPPScopeDecl): - _attrs_ = ['space', 'name', 'handle', 'methods', 'datamembers'] - _immutable_fields_ = ['handle', 'constructor', 'methods[*]', 'datamembers[*]'] + _attrs_ = ['space', 'handle', 'name', 'methods', 'datamembers'] + _immutable_fields_ = ['handle', 'name', 'methods[*]', 'datamembers[*]'] def _build_methods(self): assert len(self.methods) == 0 methods_temp = {} for i in range(capi.c_num_methods(self.space, self)): idx = capi.c_method_index_at(self.space, self, i) - pyname = helper.map_operator_name(self.space, - capi.c_method_name(self.space, self, idx), - capi.c_method_num_args(self.space, self, idx), - capi.c_method_result_type(self.space, self, idx)) + if capi.c_is_constructor(self.space, self, idx): + pyname = '__init__' + else: + pyname = helper.map_operator_name(self.space, + capi.c_method_name(self.space, self, idx), + capi.c_method_num_args(self.space, self, idx), + capi.c_method_result_type(self.space, self, idx)) cppmethod = self._make_cppfunction(pyname, idx) methods_temp.setdefault(pyname, []).append(cppmethod) # the following covers the case where the only kind of operator[](idx) @@ -883,7 +915,7 @@ # create the overload methods from the method sets for pyname, methods in methods_temp.iteritems(): CPPMethodSort(methods).sort() - if pyname == self.name: + if pyname == '__init__': overload = W_CPPConstructorOverload(self.space, self, methods[:]) else: overload = W_CPPOverload(self.space, self, methods[:]) @@ -934,11 +966,11 @@ raise self.missing_attribute_error(name) def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return 0 def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl return cppinstance.get_rawobject() def is_namespace(self): @@ -973,13 +1005,13 @@ class W_CPPComplexClassDecl(W_CPPClassDecl): def get_base_offset(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = capi.c_base_offset(self.space, self, calling_scope, cppinstance.get_rawobject(), 1) return offset def get_cppthis(self, cppinstance, calling_scope): - assert self == cppinstance.cppclass + assert self == cppinstance.clsdecl offset = self.get_base_offset(cppinstance, calling_scope) return capi.direct_ptradd(cppinstance.get_rawobject(), offset) @@ -997,70 +1029,56 @@ W_CPPComplexClassDecl.typedef.acceptable_as_base_class = False -class W_CPPTemplateType(W_Root): - _attrs_ = ['space', 'name'] - _immutable_fields = ['name'] - - def __init__(self, space, name): - self.space = space - self.name = name - - @unwrap_spec(args_w='args_w') - def __call__(self, args_w): - # TODO: this is broken but unused (see pythonify.py) - fullname = "".join([self.name, '<', self.space.text_w(args_w[0]), '>']) - return scope_byname(self.space, fullname) - -W_CPPTemplateType.typedef = TypeDef( - 'CPPTemplateType', - __call__ = interp2app(W_CPPTemplateType.__call__), -) -W_CPPTemplateType.typedef.acceptable_as_base_class = False - - class W_CPPClass(W_Root): - _attrs_ = ['space', 'cppclass', '_rawobject', 'isref', 'python_owns', + _attrs_ = ['space', 'clsdecl', '_rawobject', 'flags', 'finalizer_registered'] - _immutable_fields_ = ["cppclass", "isref"] + _immutable_fields_ = ['clsdecl'] finalizer_registered = False - def __init__(self, space, cppclass, rawobject, isref, python_owns): + def __init__(self, space, decl, rawobject, isref, python_owns): self.space = space - self.cppclass = cppclass + self.clsdecl = decl assert lltype.typeOf(rawobject) == capi.C_OBJECT assert not isref or rawobject self._rawobject = rawobject assert not isref or not python_owns - self.isref = isref - self.python_owns = python_owns - self._opt_register_finalizer() + self.flags = 0 + if isref: + self.flags |= INSTANCE_FLAGS_IS_REF + if python_owns: + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() def _opt_register_finalizer(self): - if self.python_owns and not self.finalizer_registered \ - and not hasattr(self.space, "fake"): + if not self.finalizer_registered and not hasattr(self.space, "fake"): + assert self.flags & INSTANCE_FLAGS_PYTHON_OWNS self.register_finalizer(self.space) self.finalizer_registered = True def _nullcheck(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): raise oefmt(self.space.w_ReferenceError, "trying to access a NULL pointer") # allow user to determine ownership rules on a per object level def fget_python_owns(self, space): - return space.newbool(self.python_owns) + return space.newbool(bool(self.flags & INSTANCE_FLAGS_PYTHON_OWNS)) @unwrap_spec(value=bool) def fset_python_owns(self, space, value): - self.python_owns = space.is_true(value) - self._opt_register_finalizer() + if space.is_true(value): + self.flags |= INSTANCE_FLAGS_PYTHON_OWNS + self._opt_register_finalizer() + else: + self.flags &= ~INSTANCE_FLAGS_PYTHON_OWNS def get_cppthis(self, calling_scope): - return self.cppclass.get_cppthis(self, calling_scope) + return self.clsdecl.get_cppthis(self, calling_scope) def get_rawobject(self): - if not self.isref: + if not (self.flags & INSTANCE_FLAGS_IS_REF): return self._rawobject else: ptrptr = rffi.cast(rffi.VOIDPP, self._rawobject) @@ -1078,12 +1096,9 @@ return None def instance__init__(self, args_w): - if capi.c_is_abstract(self.space, self.cppclass.handle): - raise oefmt(self.space.w_TypeError, - "cannot instantiate abstract class '%s'", - self.cppclass.name) - constructor_overload = self.cppclass.get_overload(self.cppclass.name) - constructor_overload.call(self, args_w) + raise oefmt(self.space.w_TypeError, + "cannot instantiate abstract class '%s'", + self.clsdecl.name) def instance__eq__(self, w_other): # special case: if other is None, compare pointer-style @@ -1099,7 +1114,7 @@ for name in ["", "__gnu_cxx", "__1"]: nss = scope_byname(self.space, name) meth_idx = capi.c_get_global_operator( - self.space, nss, self.cppclass, other.cppclass, "operator==") + self.space, nss, self.clsdecl, other.clsdecl, "operator==") if meth_idx != -1: f = nss._make_cppfunction("operator==", meth_idx) ol = W_CPPOverload(self.space, nss, [f]) @@ -1118,14 +1133,15 @@ # fallback 2: direct pointer comparison (the class comparison is needed since # the first data member in a struct and the struct have the same address) other = self.space.interp_w(W_CPPClass, w_other, can_be_None=False) # TODO: factor out - iseq = (self._rawobject == other._rawobject) and (self.cppclass == other.cppclass) + iseq = (self._rawobject == other._rawobject) and (self.clsdecl == other.clsdecl) return self.space.newbool(iseq) def instance__ne__(self, w_other): return self.space.not_(self.instance__eq__(w_other)) def instance__nonzero__(self): - if not self._rawobject or (self.isref and not self.get_rawobject()): + if not self._rawobject or \ + ((self.flags & INSTANCE_FLAGS_IS_REF) and not self.get_rawobject()): return self.space.w_False return self.space.w_True @@ -1134,36 +1150,35 @@ if w_as_builtin is not None: return self.space.len(w_as_builtin) raise oefmt(self.space.w_TypeError, - "'%s' has no length", self.cppclass.name) + "'%s' has no length", self.clsdecl.name) def instance__cmp__(self, w_other): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.cmp(w_as_builtin, w_other) raise oefmt(self.space.w_AttributeError, - "'%s' has no attribute __cmp__", self.cppclass.name) + "'%s' has no attribute __cmp__", self.clsdecl.name) def instance__repr__(self): w_as_builtin = self._get_as_builtin() if w_as_builtin is not None: return self.space.repr(w_as_builtin) return self.space.newtext("<%s object at 0x%x>" % - (self.cppclass.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) + (self.clsdecl.name, rffi.cast(rffi.ULONG, self.get_rawobject()))) def destruct(self): - if self._rawobject and not self.isref: + if self._rawobject and not (self.flags & INSTANCE_FLAGS_IS_REF): memory_regulator.unregister(self) - capi.c_destruct(self.space, self.cppclass, self._rawobject) + capi.c_destruct(self.space, self.clsdecl, self._rawobject) self._rawobject = capi.C_NULL_OBJECT def _finalize_(self): - if self.python_owns: + if self.flags & INSTANCE_FLAGS_PYTHON_OWNS: self.destruct() W_CPPClass.typedef = TypeDef( 'CPPClass', - cppclass = interp_attrproperty_w('cppclass', cls=W_CPPClass), - _python_owns = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), + __python_owns__ = GetSetProperty(W_CPPClass.fget_python_owns, W_CPPClass.fset_python_owns), __init__ = interp2app(W_CPPClass.instance__init__), __eq__ = interp2app(W_CPPClass.instance__eq__), __ne__ = interp2app(W_CPPClass.instance__ne__), @@ -1220,21 +1235,21 @@ state = space.fromcache(State) return space.call_function(state.w_fngen_callback, w_callable, space.newint(npar)) -def wrap_cppobject(space, rawobject, cppclass, - do_cast=True, python_owns=False, is_ref=False, fresh=False): +def wrap_cppinstance(space, rawobject, clsdecl, + do_cast=True, python_owns=False, is_ref=False, fresh=False): rawobject = rffi.cast(capi.C_OBJECT, rawobject) # cast to actual if requested and possible w_pycppclass = None if do_cast and rawobject: - actual = capi.c_actual_class(space, cppclass, rawobject) - if actual != cppclass.handle: + actual = capi.c_actual_class(space, clsdecl, rawobject) + if actual != clsdecl.handle: try: w_pycppclass = get_pythonized_cppclass(space, actual) - offset = capi.c_base_offset1(space, actual, cppclass, rawobject, -1) + offset = capi.c_base_offset1(space, actual, clsdecl, rawobject, -1) rawobject = capi.direct_ptradd(rawobject, offset) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - cppclass = space.interp_w(W_CPPClassDecl, w_cppclass, can_be_None=False) + w_cppdecl = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) + clsdecl = space.interp_w(W_CPPClassDecl, w_cppdecl, can_be_None=False) except Exception: # failed to locate/build the derived class, so stick to the base (note # that only get_pythonized_cppclass is expected to raise, so none of @@ -1242,18 +1257,18 @@ pass if w_pycppclass is None: - w_pycppclass = get_pythonized_cppclass(space, cppclass.handle) + w_pycppclass = get_pythonized_cppclass(space, clsdecl.handle) # try to recycle existing object if this one is not newly created if not fresh and rawobject: obj = memory_regulator.retrieve(rawobject) - if obj is not None and obj.cppclass is cppclass: + if obj is not None and obj.clsdecl is clsdecl: return obj # fresh creation w_cppinstance = space.allocate_instance(W_CPPClass, w_pycppclass) cppinstance = space.interp_w(W_CPPClass, w_cppinstance, can_be_None=False) - cppinstance.__init__(space, cppclass, rawobject, is_ref, python_owns) + cppinstance.__init__(space, clsdecl, rawobject, is_ref, python_owns) memory_regulator.register(cppinstance) return w_cppinstance @@ -1264,7 +1279,7 @@ except TypeError: pass # attempt to get address of C++ instance - return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj)) + return rffi.cast(rffi.INTPTR_T, converter.get_rawobject(space, w_obj, False)) @unwrap_spec(w_obj=W_Root) def addressof(space, w_obj): @@ -1273,19 +1288,30 @@ return space.newlong(address) @unwrap_spec(owns=bool, cast=bool) -def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): - """Takes an address and a bound C++ class proxy, returns a bound instance.""" +def _bind_object(space, w_obj, w_clsdecl, owns=False, cast=False): try: # attempt address from array or C++ instance rawobject = rffi.cast(capi.C_OBJECT, _addressof(space, w_obj)) except Exception: # accept integer value as address rawobject = rffi.cast(capi.C_OBJECT, space.uint_w(w_obj)) - w_cppclass = space.findattr(w_pycppclass, space.newtext("__cppdecl__")) - if not w_cppclass: - w_cppclass = scope_byname(space, space.text_w(w_pycppclass)) - if not w_cppclass: + decl = space.interp_w(W_CPPClassDecl, w_clsdecl, can_be_None=False) + return wrap_cppinstance(space, rawobject, decl, python_owns=owns, do_cast=cast) + + at unwrap_spec(owns=bool, cast=bool) +def bind_object(space, w_obj, w_pycppclass, owns=False, cast=False): From pypy.commits at gmail.com Thu Nov 23 11:50:37 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 23 Nov 2017 08:50:37 -0800 (PST) Subject: [pypy-commit] pypy default: refactor Message-ID: <5a16fc5d.c97e1c0a.c2665.dece@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93145:ff05ee1c4b6a Date: 2017-11-23 16:48 +0000 http://bitbucket.org/pypy/pypy/changeset/ff05ee1c4b6a/ Log: refactor diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -541,6 +541,10 @@ self.decoded_chars_used += size return chars + def _has_data(self): + return (self.decoded_chars is not None and + self.decoded_chars_used < len(self.decoded_chars)) + def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string @@ -588,6 +592,19 @@ return not eof + def _ensure_data(self, space): + while not self._has_data(): + try: + if not self._read_chunk(space): + self._unset_decoded() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -621,23 +638,13 @@ builder = UnicodeBuilder(size) # Keep reading chunks until we have n characters to return - while True: + while remaining > 0: + if not self._ensure_data(space): + break data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break - - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - return space.newunicode(builder.build()) def readline_w(self, space, w_limit=None): @@ -653,20 +660,9 @@ while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._unset_decoded() - self.snapshot = None start = endpos = offset_to_buffer = 0 break From pypy.commits at gmail.com Thu Nov 23 12:31:06 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 09:31:06 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix multibytecodec Message-ID: <5a1705da.2785df0a.e3321.a8b3@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93146:99ca8cf9bbc4 Date: 2017-11-23 18:30 +0100 http://bitbucket.org/pypy/pypy/changeset/99ca8cf9bbc4/ Log: fix multibytecodec diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -197,19 +197,21 @@ MBENC_FLUSH = 1 MBENC_RESET = 2 -def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None): +def encode(codec, unicodedata, length, errors="strict", errorcb=None, + namecb=None): encodebuf = pypy_cjk_enc_new(codec) if not encodebuf: raise MemoryError try: - return encodeex(encodebuf, unicodedata, errors, errorcb, namecb) + return encodeex(encodebuf, unicodedata, length, errors, errorcb, namecb) finally: pypy_cjk_enc_free(encodebuf) -def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None, +def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None, namecb=None, ignore_error=0): - inleft = len(unicodedata) - with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf: + inleft = length + inbuf = rffi.utf82wcharp(utf8data, length) + try: if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0: raise MemoryError if ignore_error == 0: @@ -221,16 +223,18 @@ if r == 0 or r == ignore_error: break multibytecodec_encerror(encodebuf, r, errors, - errorcb, namecb, unicodedata) + errorcb, namecb, utf8data) while flags & MBENC_RESET: r = pypy_cjk_enc_reset(encodebuf) if r == 0: break multibytecodec_encerror(encodebuf, r, errors, - errorcb, namecb, unicodedata) + errorcb, namecb, utf8data) src = pypy_cjk_enc_outbuf(encodebuf) length = pypy_cjk_enc_outlen(encodebuf) return rffi.charpsize2str(src, length) + finally: + lltype.free(inbuf, flavor='raw') def multibytecodec_encerror(encodebuf, e, errors, errorcb, namecb, unicodedata): @@ -256,21 +260,16 @@ elif errors == "replace": codec = pypy_cjk_enc_getcodec(encodebuf) try: - replace = encode(codec, u"?") + replace = encode(codec, "?", 1) except EncodeDecodeError: replace = "?" else: assert errorcb - XXX - retu, rets, end = errorcb(errors, namecb, reason, - unicodedata.encode("utf8"), start, end) - if rets is not None: - # py3k only - replace = rets - else: - assert retu is not None - codec = pypy_cjk_enc_getcodec(encodebuf) - replace = encode(codec, retu, "strict", errorcb, namecb) + rets, end = errorcb(errors, namecb, reason, + unicodedata, start, end) + codec = pypy_cjk_enc_getcodec(encodebuf) + lgt, _ = rutf8.get_utf8_length_flag(rets) + replace = encode(codec, rets, lgt, "strict", errorcb, namecb) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) if r == MBERR_NOMEMORY: diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -1,4 +1,5 @@ from rpython.rtyper.lltypesystem import lltype +from rpython.rlib import rutf8 from pypy.module._multibytecodec import c_codecs from pypy.module._multibytecodec.interp_multibytecodec import ( MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror, @@ -65,7 +66,8 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - return space.newunicode(output) + lgt, flag = rutf8.get_utf8_length_flag(output) + return space.newutf8(output, lgt, flag) @unwrap_spec(errors="text_or_none") @@ -88,7 +90,8 @@ def _initialize(self): self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec) - self.pending = u"" + self.pending = "" + self.pending_len = 0 def _free(self): self.pending = None @@ -96,25 +99,37 @@ c_codecs.pypy_cjk_enc_free(self.encodebuf) self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO) - @unwrap_spec(object='utf8', final=bool) - def encode_w(self, object, final=False): - u_object = object.decode('utf8') + @unwrap_spec(final=bool) + def encode_w(self, space, w_object, final=False): + utf8data, length = space.utf8_len_w(w_object) space = self.space state = space.fromcache(CodecState) if len(self.pending) > 0: - u_object = self.pending + u_object + utf8data = self.pending + utf8data + length += self.pending_len try: - output = c_codecs.encodeex(self.encodebuf, u_object, self.errors, + output = c_codecs.encodeex(self.encodebuf, utf8data, length, + self.errors, state.encode_error_handler, self.name, get_ignore_error(final)) except c_codecs.EncodeDecodeError as e: - raise wrap_unicodeencodeerror(space, e, object, len(u_object), + raise wrap_unicodeencodeerror(space, e, utf8data, length, self.name) except RuntimeError: raise wrap_runtimeerror(space) pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf) - assert 0 <= pos <= len(u_object) - self.pending = u_object[pos:] + assert 0 <= pos <= length + # scan the utf8 string until we hit pos + i = 0 + stop = length - pos + self.pending_len = stop + if stop > 0: + while pos > 0: + i = rutf8.next_codepoint_pos(utf8data, i) + pos -= 1 + self.pending = utf8data[i:] + else: + self.pending = "" return space.newbytes(output) diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -31,23 +31,23 @@ return space.newtuple([space.newutf8(utf8_output, lgt, flag), space.newint(len(input))]) - @unwrap_spec(input='utf8', errors="text_or_none") - def encode(self, space, input, errors=None): + @unwrap_spec(errors="text_or_none") + def encode(self, space, w_input, errors=None): if errors is None: errors = 'strict' state = space.fromcache(CodecState) + input, length = space.utf8_len_w(w_input) # - u_input = input.decode('utf8') try: - output = c_codecs.encode(self.codec, u_input, errors, + output = c_codecs.encode(self.codec, input, length, errors, state.encode_error_handler, self.name) except c_codecs.EncodeDecodeError as e: - raise wrap_unicodeencodeerror(space, e, input, len(u_input), + raise wrap_unicodeencodeerror(space, e, input, length, self.name) except RuntimeError: raise wrap_runtimeerror(space) return space.newtuple([space.newbytes(output), - space.newint(len(u_input))]) + space.newint(length)]) MultibyteCodec.typedef = TypeDef( diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -14,27 +14,27 @@ def test_decode_gbk(): c = getcodec("gbk") u = decode(c, "\xA1\xAA") - assert u == unichr(0x2014) + assert u == unichr(0x2014).encode('utf8') u = decode(c, "foobar") - assert u == u"foobar" + assert u == "foobar" def test_decode_hz(): # stateful c = getcodec("hz") u = decode(c, "~{abc}") - assert u == u'\u5f95\u6cef' + assert u == u'\u5f95\u6cef'.encode('utf8') u = decode(c, "~{") - assert u == u'' + assert u == '' def test_decodeex_hz(): c = getcodec("hz") decodebuf = c_codecs.pypy_cjk_dec_new(c) u = c_codecs.decodeex(decodebuf, "~{abcd~}") - assert u == u'\u5f95\u6c85' + assert u == u'\u5f95\u6c85'.encode('utf8') u = c_codecs.decodeex(decodebuf, "~{efgh~}") - assert u == u'\u5f50\u73b7' + assert u == u'\u5f50\u73b7'.encode('utf8') u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh") - assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7' + assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'.encode('utf8') c_codecs.pypy_cjk_dec_free(decodebuf) def test_decodeex_hz_incomplete(): @@ -64,7 +64,7 @@ buf += c u = c_codecs.decodeex(decodebuf, buf, ignore_error = c_codecs.MBERR_TOOFEW) - assert u == output + assert u == output.encode('utf8') incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf) buf = buf[incompletepos:] assert buf == '' @@ -86,46 +86,47 @@ def test_decode_hz_ignore(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'ignore') - assert u == u'def\u5fcf' + assert u == u'def\u5fcf'.encode('utf8') def test_decode_hz_replace(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'replace') - assert u == u'def\ufffd\u5fcf' + assert u == u'def\ufffd\u5fcf'.encode('utf8') def test_encode_hz(): c = getcodec("hz") - s = encode(c, u'foobar') + s = encode(c, u'foobar'.encode('utf8'), 6) assert s == 'foobar' and type(s) is str - s = encode(c, u'\u5f95\u6cef') + s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2) assert s == '~{abc}~}' def test_encode_hz_error(): # error c = getcodec("hz") - e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value + e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def'.encode('utf8'), 7).value assert e.start == 3 assert e.end == 4 assert e.reason == "illegal multibyte sequence" def test_encode_hz_ignore(): c = getcodec("hz") - s = encode(c, u'abc\u1234def', 'ignore') + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'ignore') assert s == 'abcdef' def test_encode_hz_replace(): c = getcodec("hz") - s = encode(c, u'abc\u1234def', 'replace') + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'replace') assert s == 'abc?def' def test_encode_jisx0208(): c = getcodec('iso2022_jp') - s = encode(c, u'\u83ca\u5730\u6642\u592b') + s = encode(c, u'\u83ca\u5730\u6642\u592b'.encode('utf8'), 4) assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str def test_encode_custom_error_handler_bytes(): + py.test.skip("needs revamping in py3k") c = getcodec("hz") def errorhandler(errors, enc, msg, t, startingpos, endingpos): - return None, '\xc3', endingpos - s = encode(c, u'abc\u1234def', 'foo', errorhandler) + return u'\xc3'.encode('utf8'), endingpos + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler) assert '\xc3' in s diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py --- a/pypy/module/_multibytecodec/test/test_translation.py +++ b/pypy/module/_multibytecodec/test/test_translation.py @@ -1,6 +1,7 @@ from pypy.module._multibytecodec import c_codecs from rpython.translator.c.test import test_standalone from rpython.config.translationoption import get_combined_translation_config +from rpython.rlib import rutf8 class TestTranslation(test_standalone.StandaloneTests): @@ -13,7 +14,8 @@ codecname, string = argv[1], argv[2] c = c_codecs.getcodec(codecname) u = c_codecs.decode(c, string) - r = c_codecs.encode(c, u) + lgt, _ = rutf8.get_utf8_length_flag(u) + r = c_codecs.encode(c, u, lgt) print r return 0 # From pypy.commits at gmail.com Thu Nov 23 12:55:56 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 23 Nov 2017 09:55:56 -0800 (PST) Subject: [pypy-commit] pypy default: Simplify _find_line_ending() and fix logic in the case of embedded \r and self.readnl=='\r\n' Message-ID: <5a170bac.8fb1df0a.eb254.5f3f@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93147:8369cd92f7d0 Date: 2017-11-23 17:52 +0000 http://bitbucket.org/pypy/pypy/changeset/8369cd92f7d0/ Log: Simplify _find_line_ending() and fix logic in the case of embedded \r and self.readnl=='\r\n' diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -216,14 +216,7 @@ def _find_line_ending(self, line, start, end): size = end - start - if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find(u'\n', start, end) - if pos >= 0: - return pos + 1, 0 - else: - return -1, size - elif self.readuniversal: + if self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces i = start @@ -242,16 +235,22 @@ return i + 1, 0 else: return i, 0 + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' else: # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size + newline = self.readnl + end_scan = end - len(newline) + 1 + for i in range(start, end_scan): + ch = line[i] + if ch == newline[0]: + for j in range(1, len(newline)): + if line[i + j] != newline[j]: + break + else: + return i + len(newline), 0 + return -1, end_scan W_TextIOBase.typedef = TypeDef( From pypy.commits at gmail.com Thu Nov 23 13:02:57 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 23 Nov 2017 10:02:57 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: one part of interp_sre Message-ID: <5a170d51.e1acdf0a.beeec.9bf2@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93148:5a057586add0 Date: 2017-11-23 19:02 +0100 http://bitbucket.org/pypy/pypy/changeset/5a057586add0/ Log: one part of interp_sre diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -7,7 +7,8 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import Utf8StringBuilder # ____________________________________________________________ # @@ -237,8 +238,8 @@ filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): - filter_as_unicode = space.unicode_w(w_ptemplate) - literal = u'\\' not in filter_as_unicode + filter_as_unicode = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_unicode use_builder = ( space.isinstance_w(w_string, space.w_unicode) and literal) else: @@ -267,7 +268,7 @@ sublist_w = strbuilder = unicodebuilder = None if use_builder: if filter_as_unicode is not None: - unicodebuilder = UnicodeBuilder(ctx.end) + unicodebuilder = Utf8StringBuilder(ctx.end) else: assert filter_as_string is not None strbuilder = StringBuilder(ctx.end) @@ -335,7 +336,9 @@ return space.newbytes(strbuilder.build()), n else: assert unicodebuilder is not None - return space.newunicode(unicodebuilder.build()), n + return space.newutf8(unicodebuilder.build(), + unicodebuilder.get_length(), + unicodebuilder.get_flag()), n else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newunicode(u'') From pypy.commits at gmail.com Thu Nov 23 13:11:59 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 23 Nov 2017 10:11:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a170f6f.cc5e1c0a.87e67.c5b7@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93149:0797bb6394b6 Date: 2017-11-23 18:07 +0000 http://bitbucket.org/pypy/pypy/changeset/0797bb6394b6/ Log: hg merge default diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -223,14 +223,7 @@ def _find_line_ending(self, line, start, end): size = end - start - if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start, end) - if pos >= 0: - return pos + 1, 0 - else: - return -1, size - elif self.readuniversal: + if self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces i = start @@ -249,16 +242,22 @@ return i + 1, 0 else: return i, 0 + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = '\n' else: # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size + newline = self.readnl + end_scan = end - len(newline) + 1 + for i in range(start, end_scan): + ch = line[i] + if ch == newline[0]: + for j in range(1, len(newline)): + if line[i + j] != newline[j]: + break + else: + return i + len(newline), 0 + return -1, end_scan W_TextIOBase.typedef = TypeDef( @@ -548,6 +547,10 @@ self.decoded_chars_used += size return chars + def _has_data(self): + return (self.decoded_chars is not None and + self.decoded_chars_used < len(self.decoded_chars)) + def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string @@ -595,6 +598,19 @@ return not eof + def _ensure_data(self, space): + while not self._has_data(): + try: + if not self._read_chunk(space): + self._unset_decoded() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -628,23 +644,13 @@ builder = StringBuilder(size) # Keep reading chunks until we have n characters to return - while True: + while remaining > 0: + if not self._ensure_data(space): + break data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break - - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - return space.new_from_utf8(builder.build()) def readline_w(self, space, w_limit=None): @@ -660,20 +666,9 @@ while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._unset_decoded() - self.snapshot = None start = endpos = offset_to_buffer = 0 break From pypy.commits at gmail.com Thu Nov 23 15:05:45 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 23 Nov 2017 12:05:45 -0800 (PST) Subject: [pypy-commit] pypy default: fix test use of eci for vmprof_start_sampling, vmprof_start_sampling Message-ID: <5a172a19.03da1c0a.d5476.8ad6@mx.google.com> Author: Matti Picus Branch: Changeset: r93151:72001f56a97f Date: 2017-11-23 20:28 +0200 http://bitbucket.org/pypy/pypy/changeset/72001f56a97f/ Log: fix test use of eci for vmprof_start_sampling, vmprof_start_sampling diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -9,6 +9,7 @@ from rpython.rtyper.tool import rffi_platform as platform from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated +from rpython.config.translationoption import get_translation_config class VMProfPlatformUnsupported(Exception): pass @@ -133,11 +134,17 @@ #endif """]) +if get_translation_config() is None: + # tests need the full eci here + _eci = global_eci +else: + _eci = auto_eci + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=auto_eci, + rffi.INT, compilation_info=_eci, _nowrapper=True) vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=auto_eci, + lltype.Void, compilation_info=_eci, _nowrapper=True) From pypy.commits at gmail.com Thu Nov 23 15:05:42 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 23 Nov 2017 12:05:42 -0800 (PST) Subject: [pypy-commit] pypy default: cannot pip install vmprof on arm, s390x Message-ID: <5a172a16.21b9df0a.dcef.f631@mx.google.com> Author: Matti Picus Branch: Changeset: r93150:8c42f0f755c0 Date: 2017-11-23 18:48 +0200 http://bitbucket.org/pypy/pypy/changeset/8c42f0f755c0/ Log: cannot pip install vmprof on arm, s390x diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ cffi>=1.4.0 -vmprof>=0.4.10 # required to parse log files in rvmprof tests + +# parse log files in rvmprof tests +vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x # hypothesis is used for test generation on untranslated tests hypothesis From pypy.commits at gmail.com Thu Nov 23 15:08:55 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 23 Nov 2017 12:08:55 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into py3.5 Message-ID: <5a172ad7.d58bdf0a.8cc33.c5c9@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93152:ce6402cbdf3c Date: 2017-11-23 22:08 +0200 http://bitbucket.org/pypy/pypy/changeset/ce6402cbdf3c/ Log: merge default into py3.5 diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -184,9 +184,7 @@ start, end ) - if endpos >= 0: - endpos += start - else: + if endpos < 0: endpos = end assert endpos >= 0 self.pos = endpos diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -216,44 +216,41 @@ def _find_line_ending(self, line, start, end): size = end - start - if self.readtranslate: - - # Newlines are already translated, only search for \n - pos = line.find(u'\n', start, end) - if pos >= 0: - return pos - start + 1, 0 - else: - return -1, size - elif self.readuniversal: + if self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces - i = 0 + i = start while True: - # Fast path for non-control chars. The loop always ends - # since the Py_UNICODE storage is NUL-terminated. - while i < size and line[start + i] > '\r': + # Fast path for non-control chars. + while i < end and line[i] > '\r': i += 1 - if i >= size: + if i >= end: return -1, size - ch = line[start + i] + ch = line[i] i += 1 if ch == '\n': return i, 0 if ch == '\r': - if line[start + i] == '\n': + if line[i] == '\n': return i + 1, 0 else: return i, 0 + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' else: # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos - start + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size + newline = self.readnl + end_scan = end - len(newline) + 1 + for i in range(start, end_scan): + ch = line[i] + if ch == newline[0]: + for j in range(1, len(newline)): + if line[i + j] != newline[j]: + break + else: + return i + len(newline), 0 + return -1, end_scan W_TextIOBase.typedef = TypeDef( @@ -549,8 +546,13 @@ # _____________________________________________________________ # read methods - def _set_decoded_chars(self, chars): - self.decoded_chars = chars + def _unset_decoded(self): + self.decoded_chars = None + self.decoded_chars_used = 0 + + def _set_decoded(self, space, w_decoded): + check_decoded(space, w_decoded) + self.decoded_chars = space.unicode_w(w_decoded) self.decoded_chars_used = 0 def _get_decoded_chars(self, size): @@ -574,6 +576,10 @@ self.decoded_chars_used += size return chars + def _has_data(self): + return (self.decoded_chars is not None and + self.decoded_chars_used < len(self.decoded_chars)) + def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string @@ -616,8 +622,7 @@ eof = input_buf.getlength() == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -629,6 +634,19 @@ return not eof + def _ensure_data(self, space): + while not self._has_data(): + try: + if not self._read_chunk(space): + self._unset_decoded() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -662,23 +680,13 @@ builder = UnicodeBuilder(size) # Keep reading chunks until we have n characters to return - while True: + while remaining > 0: + if not self._ensure_data(space): + break data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break - - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - return space.newunicode(builder.build()) def readline_w(self, space, w_limit=None): @@ -687,28 +695,16 @@ self._writeflush(space) limit = convert_size(space, w_limit) - chunked = 0 line = None remaining = None - chunks = [] + builder = UnicodeBuilder() while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._set_decoded_chars(None) - self.snapshot = None start = endpos = offset_to_buffer = 0 break @@ -725,8 +721,8 @@ line_len = len(line) endpos, consumed = self._find_line_ending(line, start, line_len) + chunked = builder.getlength() if endpos >= 0: - endpos += start if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked assert endpos >= 0 @@ -744,15 +740,15 @@ # No line ending seen yet - put aside current data if endpos > start: s = line[start:endpos] - chunks.append(s) - chunked += len(s) + builder.append(s) + # There may be some remaining bytes we'll have to prepend to the # next chunk of data if endpos < line_len: remaining = line[endpos:] line = None # We have consumed the buffer - self._set_decoded_chars(None) + self._unset_decoded() if line: # Our line ends in the current buffer @@ -761,18 +757,12 @@ self.decoded_chars_used = decoded_chars_used if start > 0 or endpos < len(line): line = line[start:endpos] - if remaining: - chunks.append(remaining) - remaining = None - if chunks: - if line: - chunks.append(line) - line = u''.join(chunks) + builder.append(line) + elif remaining: + builder.append(remaining) - if line: - return space.newunicode(line) - else: - return space.newunicode(u'') + result = builder.build() + return space.newunicode(result) # _____________________________________________________________ # write methods @@ -913,7 +903,7 @@ self._unsupportedoperation( space, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -943,7 +933,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._set_decoded_chars(None) + self._unset_decoded() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -964,8 +954,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - check_decoded(space, w_decoded) - self._set_decoded_chars(space.unicode_w(w_decoded)) + self._set_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters if len(self.decoded_chars) < cookie.chars_to_skip: @@ -1034,7 +1023,7 @@ w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(input[i])) check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.bytes_to_feed += 1 diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ cffi>=1.4.0 -vmprof>=0.4.10 # required to parse log files in rvmprof tests + +# parse log files in rvmprof tests +vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,6 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib import rvmprof from rpython.rlib.rvmprof import cintf DEBUG = False @@ -41,13 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - rvmprof.stop_sampling() x = cintf.save_rvmprof_stack() try: h = self._gcrootfinder.switch(stacklet) finally: cintf.restore_rvmprof_stack(x) - rvmprof.start_sampling() if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,8 +56,10 @@ return None def stop_sampling(): - fd = _get_vmprof().cintf.vmprof_stop_sampling() + from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling + fd = vmprof_stop_sampling() return rffi.cast(lltype.Signed, fd) def start_sampling(): - _get_vmprof().cintf.vmprof_start_sampling() + from rpython.rlib.rvmprof.cintf import vmprof_start_sampling + vmprof_start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -9,6 +9,7 @@ from rpython.rtyper.tool import rffi_platform as platform from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated +from rpython.config.translationoption import get_translation_config class VMProfPlatformUnsupported(Exception): pass @@ -40,7 +41,7 @@ compile_extra += ['-DVMPROF_UNIX'] compile_extra += ['-DVMPROF_LINUX'] elif sys.platform == 'win32': - compile_extra = ['-DRPYTHON_VMPROF', '-DVMPROF_WINDOWS'] + compile_extra += ['-DVMPROF_WINDOWS'] separate_module_files = [SHARED.join('vmprof_win.c')] _libs = [] else: @@ -120,16 +121,32 @@ vmprof_get_profile_path = rffi.llexternal("vmprof_get_profile_path", [rffi.CCHARP, lltype.Signed], lltype.Signed, compilation_info=eci, _nowrapper=True) - vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=eci, - _nowrapper=True) - vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=eci, - _nowrapper=True) return CInterface(locals()) +# this is always present, but compiles to no-op if RPYTHON_VMPROF is not +# defined (i.e. if we don't actually use vmprof in the generated C) +auto_eci = ExternalCompilationInfo(post_include_bits=[""" +#ifndef RPYTHON_VMPROF +# define vmprof_stop_sampling() (-1) +# define vmprof_start_sampling() ((void)0) +#endif +"""]) + +if get_translation_config() is None: + # tests need the full eci here + _eci = global_eci +else: + _eci = auto_eci + +vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=_eci, + _nowrapper=True) +vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=_eci, + _nowrapper=True) + class CInterface(object): def __init__(self, namespace): @@ -218,6 +235,7 @@ # stacklet support def save_rvmprof_stack(): + vmprof_stop_sampling() return vmprof_tl_stack.get_or_make_raw() def empty_rvmprof_stack(): @@ -225,6 +243,7 @@ def restore_rvmprof_stack(x): vmprof_tl_stack.setraw(x) + vmprof_start_sampling() # # traceback support diff --git a/rpython/rlib/rvmprof/src/rvmprof.c b/rpython/rlib/rvmprof/src/rvmprof.c --- a/rpython/rlib/rvmprof/src/rvmprof.c +++ b/rpython/rlib/rvmprof/src/rvmprof.c @@ -12,6 +12,7 @@ #endif +#include "vmprof_common.h" #include "shared/vmprof_get_custom_offset.h" #ifdef VMPROF_UNIX @@ -30,7 +31,7 @@ } #endif -long vmprof_get_profile_path(const char * buffer, long size) +long vmprof_get_profile_path(char * buffer, long size) { return vmp_fd_to_path(vmp_profile_fileno(), buffer, size); } diff --git a/rpython/rlib/rvmprof/src/rvmprof.h b/rpython/rlib/rvmprof/src/rvmprof.h --- a/rpython/rlib/rvmprof/src/rvmprof.h +++ b/rpython/rlib/rvmprof/src/rvmprof.h @@ -36,8 +36,8 @@ RPY_EXTERN int vmprof_stack_append(void*, long); RPY_EXTERN long vmprof_stack_pop(void*); RPY_EXTERN void vmprof_stack_free(void*); -RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, intptr_t*, intptr_t); -RPY_EXTERN long vmprof_get_profile_path(const char *, long); +RPY_EXTERN intptr_t vmprof_get_traceback(void *, void *, void**, intptr_t); +RPY_EXTERN long vmprof_get_profile_path(char *, long); RPY_EXTERN int vmprof_stop_sampling(void); RPY_EXTERN void vmprof_start_sampling(void); diff --git a/rpython/rlib/rvmprof/src/shared/vmp_stack.c b/rpython/rlib/rvmprof/src/shared/vmp_stack.c --- a/rpython/rlib/rvmprof/src/shared/vmp_stack.c +++ b/rpython/rlib/rvmprof/src/shared/vmp_stack.c @@ -262,7 +262,7 @@ } int depth = 0; - PY_STACK_FRAME_T * top_most_frame = frame; + //PY_STACK_FRAME_T * top_most_frame = frame; while ((depth + _per_loop()) <= max_depth) { unw_get_proc_info(&cursor, &pip); @@ -400,7 +400,7 @@ if (fd == NULL) { return 0; } - char * saveptr; + char * saveptr = NULL; char * line = NULL; char * he = NULL; char * name; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.c b/rpython/rlib/rvmprof/src/shared/vmprof_common.c --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.c +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.c @@ -4,6 +4,9 @@ #include #ifdef RPYTHON_VMPROF + +int get_stack_trace(PY_THREAD_STATE_T * current, void** result, int max_depth, intptr_t pc); + #ifdef RPYTHON_LL2CTYPES /* only for testing: ll2ctypes sets RPY_EXTERN from the command-line */ @@ -193,7 +196,7 @@ #endif intptr_t vmprof_get_traceback(void *stack, void *ucontext, - intptr_t *result_p, intptr_t result_length) + void **result_p, intptr_t result_length) { int n; int enabled; diff --git a/rpython/rlib/rvmprof/src/shared/vmprof_common.h b/rpython/rlib/rvmprof/src/shared/vmprof_common.h --- a/rpython/rlib/rvmprof/src/shared/vmprof_common.h +++ b/rpython/rlib/rvmprof/src/shared/vmprof_common.h @@ -96,7 +96,7 @@ #endif RPY_EXTERN intptr_t vmprof_get_traceback(void *stack, void *ucontext, - intptr_t *result_p, intptr_t result_length); + void **result_p, intptr_t result_length); #endif int vmprof_get_signal_type(void); diff --git a/rpython/rlib/rvmprof/test/test_rvmprof.py b/rpython/rlib/rvmprof/test/test_rvmprof.py --- a/rpython/rlib/rvmprof/test/test_rvmprof.py +++ b/rpython/rlib/rvmprof/test/test_rvmprof.py @@ -144,7 +144,8 @@ @pytest.fixture def init(self, tmpdir): - eci = ExternalCompilationInfo(compile_extra=['-g','-O0'], + eci = ExternalCompilationInfo(compile_extra=['-g','-O0', '-Werror'], + post_include_bits = ['int native_func(int);'], separate_module_sources=[""" RPY_EXTERN int native_func(int d) { int j = 0; From pypy.commits at gmail.com Fri Nov 24 02:18:36 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 23 Nov 2017 23:18:36 -0800 (PST) Subject: [pypy-commit] pypy default: generate conf.h for tests Message-ID: <5a17c7cc.52bf1c0a.cd6cb.ec74@mx.google.com> Author: Matti Picus Branch: Changeset: r93153:d7c94a4970dd Date: 2017-11-24 09:16 +0200 http://bitbucket.org/pypy/pypy/changeset/d7c94a4970dd/ Log: generate conf.h for tests diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() From pypy.commits at gmail.com Fri Nov 24 02:18:38 2017 From: pypy.commits at gmail.com (mattip) Date: Thu, 23 Nov 2017 23:18:38 -0800 (PST) Subject: [pypy-commit] pypy py3.5: merge default into py3.5 Message-ID: <5a17c7ce.07d81c0a.b6ab9.f4a1@mx.google.com> Author: Matti Picus Branch: py3.5 Changeset: r93154:d2807ddb8178 Date: 2017-11-24 09:17 +0200 http://bitbucket.org/pypy/pypy/changeset/d2807ddb8178/ Log: merge default into py3.5 diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() From pypy.commits at gmail.com Fri Nov 24 04:04:41 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 24 Nov 2017 01:04:41 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: start working on pypyjson Message-ID: <5a17e0a9.06b7df0a.2eba1.5b54@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93155:109fd5f5d4eb Date: 2017-11-23 20:52 +0100 http://bitbucket.org/pypy/pypy/changeset/109fd5f5d4eb/ Log: start working on pypyjson diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1760,10 +1760,6 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) - def unicode_w(self, w_obj): - # XXX: kill me! - return w_obj.utf8_w(self).decode('utf-8') - def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -1,7 +1,7 @@ import sys from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize, always_inline, r_dict -from rpython.rlib import rfloat, runicode +from rpython.rlib import rfloat, runicode, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi from pypy.interpreter.error import oefmt from pypy.interpreter import unicodehelper @@ -19,29 +19,6 @@ return 0.0 return x * NEG_POW_10[exp] -def strslice2unicode_latin1(s, start, end): - """ - Convert s[start:end] to unicode. s is supposed to be an RPython string - encoded in latin-1, which means that the numeric value of each char is the - same as the corresponding unicode code point. - - Internally it's implemented at the level of low-level helpers, to avoid - the extra copy we would need if we take the actual slice first. - - No bound checking is done, use carefully. - """ - from rpython.rtyper.annlowlevel import llstr, hlunicode - from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE - from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar - length = end-start - ll_s = llstr(s) - ll_res = malloc(UNICODE, length) - ll_res.hash = 0 - for i in range(length): - ch = ll_s.chars[start+i] - ll_res.chars[i] = cast_primitive(UniChar, ch) - return hlunicode(ll_res) - def slice_eq(a, b): (ll_chars1, start1, length1, _) = a (ll_chars2, start2, length2, _) = b @@ -312,8 +289,7 @@ bits |= ord(ch) if ch == '"': self.pos = i - return self.space.newunicode( - self._create_string(start, i - 1, bits)) + return self._create_string(start, i - 1, bits) elif ch == '\\' or ch < '\x20': self.pos = i-1 return self.decode_string_escaped(start) @@ -322,12 +298,15 @@ if bits & 0x80: # the 8th bit is set, it's an utf8 string content_utf8 = self.getslice(start, end) - return unicodehelper.decode_utf8(self.space, content_utf8) + lgt, flag = unicodehelper.check_utf8_or_raise(self.space, + content_utf8) + return self.space.newutf8(content_utf8, lgt, flag) else: # ascii only, fast path (ascii is a strict subset of # latin1, and we already checked that all the chars are < # 128) - return strslice2unicode_latin1(self.s, start, end) + return self.space.newutf8(self.getslice(start, end), + end - start, rutf8.FLAG_ASCII) def decode_string_escaped(self, start): i = self.pos @@ -340,9 +319,10 @@ i += 1 if ch == '"': content_utf8 = builder.build() - content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) + lgt, f = unicodehelper.check_utf8_or_raise(self.space, + content_utf8) self.pos = i - return self.space.newunicode(content_unicode) + return self.space.newutf8(content_utf8, lgt, f) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -10,10 +10,14 @@ assert dec.skip_whitespace(8) == len(s) dec.close() +class FakeSpace(object): + def newutf8(self, s, l, f): + return s + def test_decode_key(): s1 = "123" * 100 s = ' "%s" "%s" ' % (s1, s1) - dec = JSONDecoder('fake space', s) + dec = JSONDecoder(FakeSpace(), s) assert dec.pos == 0 x = dec.decode_key(0) assert x == s1 diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,23 +367,10 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) - def new_from_utf8(self, utf8s): - # XXX: kill me! - assert isinstance(utf8s, str) - length, flag = rutf8.check_utf8(utf8s, True) - return W_UnicodeObject(utf8s, length, flag) - def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding - def newunicode(self, unistr): - # XXX: kill me! - assert isinstance(unistr, unicode) - utf8s = unistr.encode("utf-8") - length, flag = rutf8.check_utf8(utf8s, True) - return self.newutf8(utf8s, length, flag) - def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Fri Nov 24 04:04:43 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 24 Nov 2017 01:04:43 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge Message-ID: <5a17e0ab.8dc1df0a.ef7c5.6dd7@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93156:8fac293591e9 Date: 2017-11-24 10:04 +0100 http://bitbucket.org/pypy/pypy/changeset/8fac293591e9/ Log: merge diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -223,14 +223,7 @@ def _find_line_ending(self, line, start, end): size = end - start - if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start, end) - if pos >= 0: - return pos + 1, 0 - else: - return -1, size - elif self.readuniversal: + if self.readuniversal: # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces i = start @@ -249,16 +242,22 @@ return i + 1, 0 else: return i, 0 + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = '\n' else: # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size + newline = self.readnl + end_scan = end - len(newline) + 1 + for i in range(start, end_scan): + ch = line[i] + if ch == newline[0]: + for j in range(1, len(newline)): + if line[i + j] != newline[j]: + break + else: + return i + len(newline), 0 + return -1, end_scan W_TextIOBase.typedef = TypeDef( @@ -548,6 +547,10 @@ self.decoded_chars_used += size return chars + def _has_data(self): + return (self.decoded_chars is not None and + self.decoded_chars_used < len(self.decoded_chars)) + def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string @@ -595,6 +598,19 @@ return not eof + def _ensure_data(self, space): + while not self._has_data(): + try: + if not self._read_chunk(space): + self._unset_decoded() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -628,23 +644,13 @@ builder = StringBuilder(size) # Keep reading chunks until we have n characters to return - while True: + while remaining > 0: + if not self._ensure_data(space): + break data = self._get_decoded_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break - - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - return space.new_from_utf8(builder.build()) def readline_w(self, space, w_limit=None): @@ -660,20 +666,9 @@ while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._unset_decoded() - self.snapshot = None start = endpos = offset_to_buffer = 0 break From pypy.commits at gmail.com Fri Nov 24 04:53:49 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 24 Nov 2017 01:53:49 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix _ssl module Message-ID: <5a17ec2d.dc361c0a.1637d.6dbf@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93157:8a24f68050df Date: 2017-11-24 10:53 +0100 http://bitbucket.org/pypy/pypy/changeset/8a24f68050df/ Log: fix _ssl module diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py --- a/pypy/module/_ssl/interp_ssl.py +++ b/pypy/module/_ssl/interp_ssl.py @@ -1566,12 +1566,13 @@ cadata = space.bufferstr_w(w_cadata) else: ca_file_type = SSL_FILETYPE_PEM - try: - cadata = space.unicode_w(w_cadata).encode('ascii') - except UnicodeEncodeError: + w_uni = space.convert_arg_to_w_unicode(w_cadata) + if not w_uni.is_ascii(): raise oefmt(space.w_TypeError, "cadata should be a ASCII string or a " "bytes-like object") + cadata = space.utf8_w(w_uni) + if cafile is None and capath is None and cadata is None: raise oefmt(space.w_TypeError, "cafile and capath cannot be both omitted") From pypy.commits at gmail.com Fri Nov 24 05:16:59 2017 From: pypy.commits at gmail.com (fijal) Date: Fri, 24 Nov 2017 02:16:59 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: start fixing _rawffi Message-ID: <5a17f19b.f3c4df0a.91aaa.bb7f@mx.google.com> Author: fijal Branch: unicode-utf8 Changeset: r93158:467a32f09dd6 Date: 2017-11-24 11:16 +0100 http://bitbucket.org/pypy/pypy/changeset/467a32f09dd6/ Log: start fixing _rawffi diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -167,8 +167,8 @@ addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr) - def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): - buf = rffi.unicode2wcharp(unicodeval) + def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len): + buf = rffi.utf82wcharp(utf8val, utf8len) self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr) diff --git a/pypy/module/_rawffi/alt/test/test_type_converter.py b/pypy/module/_rawffi/alt/test/test_type_converter.py --- a/pypy/module/_rawffi/alt/test/test_type_converter.py +++ b/pypy/module/_rawffi/alt/test/test_type_converter.py @@ -6,7 +6,7 @@ class DummyFromAppLevelConverter(FromAppLevelConverter): - def handle_all(self, w_ffitype, w_obj, val): + def handle_all(self, w_ffitype, w_obj, val, lgt=None): self.lastval = val handle_signed = handle_all @@ -120,8 +120,8 @@ def test_strings(self): # first, try automatic conversion from applevel self.check(app_types.char_p, self.space.newbytes('foo'), 'foo') - self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234') - self.check(app_types.unichar_p, self.space.wrap('foo'), u'foo') + self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234'.encode('utf8')) + self.check(app_types.unichar_p, self.space.wrap('foo'), 'foo') # then, try to pass explicit pointers self.check(app_types.char_p, self.space.wrap(42), 42) self.check(app_types.unichar_p, self.space.wrap(42), 42) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -1,6 +1,6 @@ from rpython.rlib import libffi -from rpython.rlib import jit -from rpython.rlib.rarithmetic import r_uint +from rpython.rlib import jit, rutf8 +from rpython.rlib.rarithmetic import r_uint, intmask from pypy.interpreter.error import oefmt from pypy.module._rawffi.structure import W_StructureInstance, W_Structure from pypy.module._rawffi.alt.interp_ffitype import app_types @@ -85,8 +85,8 @@ return True elif w_ffitype.is_unichar_p() and (w_type is self.space.w_bytes or w_type is self.space.w_unicode): - unicodeval = self.space.unicode_w(w_obj) - self.handle_unichar_p(w_ffitype, w_obj, unicodeval) + utf8, lgt = self.space.utf8_len_w(w_obj) + self.handle_unichar_p(w_ffitype, w_obj, utf8, lgt) return True return False @@ -147,7 +147,7 @@ """ self.error(w_ffitype, w_obj) - def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): + def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len): """ unicodeval: interp-level unicode """ @@ -228,7 +228,8 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newunicode(unichr(wcharval)) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1, + rutf8.get_flag_from_code(intmask(wcharval))) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -10,6 +10,7 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.tool import rffi_platform from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib import rutf8 from rpython.rlib.objectmodel import specialize import rpython.rlib.rposix as rposix @@ -416,13 +417,13 @@ val = s[0] push_func(add_arg, argdesc, val) elif letter == 'u': - s = space.unicode_w(w_arg) - if len(s) != 1: + s, lgt = space.utf8_len_w(w_arg) + if lgt != 1: raise oefmt(space.w_TypeError, "Expected unicode string of length one as wide " "character") - val = s[0] - push_func(add_arg, argdesc, val) + val = rutf8.codepoint_at_pos(s, 0) + push_func(add_arg, argdesc, rffi.cast(rffi.WCHAR_T, val)) else: for c in unroll_letters_for_numbers: if letter == c: diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py --- a/rpython/rtyper/lltypesystem/rffi.py +++ b/rpython/rtyper/lltypesystem/rffi.py @@ -1024,13 +1024,14 @@ def utf82wcharp(utf8, utf8len): from rpython.rlib import rutf8 - w = lltype.malloc(CWCHARP.TO, utf8len, flavor='raw') + w = lltype.malloc(CWCHARP.TO, utf8len + 1, flavor='raw') i = 0 index = 0 while i < len(utf8): w[index] = unichr(rutf8.codepoint_at_pos(utf8, i)) i = rutf8.next_codepoint_pos(utf8, i) index += 1 + w[index] = unichr(0) return w # char** From pypy.commits at gmail.com Fri Nov 24 06:50:51 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 24 Nov 2017 03:50:51 -0800 (PST) Subject: [pypy-commit] buildbot default: not needed, virtualenv is deleted by "hg purge" Message-ID: <5a18079b.42e61c0a.73c2e.9727@mx.google.com> Author: Matti Picus Branch: Changeset: r1042:0a18cb374a4e Date: 2017-11-24 13:49 +0200 http://bitbucket.org/pypy/buildbot/changeset/0a18cb374a4e/ Log: not needed, virtualenv is deleted by "hg purge" diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -563,14 +563,8 @@ if platform == 'win32': self.virt_python = r'virt_test\Scripts\python.exe' - clean = 'rmdir /s /q virt-test' else: self.virt_python = 'virt_test/bin/python' - clean = 'rm -rf virt-test' - self.addStep(ShellCmd( - description="clean old virtualenv", - command=clean, - haltOnFailure=False)) self.addStep(ShellCmd( description="create virtualenv for tests", command=['virtualenv', 'virt_test'], From pypy.commits at gmail.com Fri Nov 24 06:50:53 2017 From: pypy.commits at gmail.com (mattip) Date: Fri, 24 Nov 2017 03:50:53 -0800 (PST) Subject: [pypy-commit] buildbot default: update pip, setuptools Message-ID: <5a18079d.8dc1df0a.ef7c5.7337@mx.google.com> Author: Matti Picus Branch: Changeset: r1043:0548ff25f980 Date: 2017-11-24 13:50 +0200 http://bitbucket.org/pypy/buildbot/changeset/0548ff25f980/ Log: update pip, setuptools diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -572,7 +572,14 @@ )) self.addStep(ShellCmd( - description="install requirments to virtual environment", + description="update pip", + command=[self.virt_python, '-mpip', 'install', '--upgrade', + 'pip' , 'setuptools'], + haltOnFailure=True, + )) + + self.addStep(ShellCmd( + description="install requirements to virtual environment", command=[self.virt_python, '-mpip', 'install', '-r', 'requirements.txt'], haltOnFailure=True, From pypy.commits at gmail.com Fri Nov 24 08:00:47 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 05:00:47 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix more tests Message-ID: <5a1817ff.c23a1c0a.d3e0.7191@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93160:a9bb96fbf9d4 Date: 2017-11-24 13:53 +0100 http://bitbucket.org/pypy/pypy/changeset/a9bb96fbf9d4/ Log: fix more tests BUT: a slight pessimization, because object decoding becomes a little bit slower diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -247,10 +247,11 @@ self.pos = i+1 return self.space.newdict() - d = {} + # XXX this should be improved to use an unwrapped dict + w_dict = self.space.newdict() while True: # parse a key: value - name = self.decode_key(i) + w_name = self.decode_key(i) i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] if ch != ':': @@ -259,13 +260,13 @@ i = self.skip_whitespace(i) # w_value = self.decode_any(i) - d[name] = w_value + self.space.setitem(w_dict, w_name, w_value) i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] i += 1 if ch == '}': self.pos = i - return self._create_dict(d) + return w_dict elif ch == ',': pass elif ch == '\0': @@ -274,10 +275,6 @@ self._raise("Unexpected '%s' when decoding object (char %d)", ch, i-1) - def _create_dict(self, d): - from pypy.objspace.std.dictmultiobject import from_unicode_key_dict - return from_unicode_key_dict(self.space, d) - def decode_string(self, i): start = i bits = 0 @@ -383,7 +380,7 @@ return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00)) def decode_key(self, i): - """ returns an unwrapped unicode """ + """ returns a wrapped unicode """ from rpython.rlib.rarithmetic import intmask i = self.skip_whitespace(i) diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1257,12 +1257,6 @@ create_iterator_classes(UnicodeDictStrategy) -def from_unicode_key_dict(space, d): - strategy = space.fromcache(UnicodeDictStrategy) - storage = strategy.erase(d) - return W_DictObject(space, strategy, storage) - - class IntDictStrategy(AbstractTypedStrategy, DictStrategy): erase, unerase = rerased.new_erasing_pair("int") erase = staticmethod(erase) From pypy.commits at gmail.com Fri Nov 24 08:00:45 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 05:00:45 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix unicode \-encoding in _pypyjson Message-ID: <5a1817fd.d7941c0a.d4557.3084@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93159:82223a975b6b Date: 2017-11-24 13:00 +0100 http://bitbucket.org/pypy/pypy/changeset/82223a975b6b/ Log: fix unicode \-encoding in _pypyjson diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -369,8 +369,7 @@ return # help the annotator to know that we'll never go beyond # this point # - uchr = runicode.code_to_unichr(val) # may be a surrogate pair again - utf8_ch = unicodehelper.encode_utf8(self.space, uchr) + utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True) builder.append(utf8_ch) return i From pypy.commits at gmail.com Fri Nov 24 08:27:58 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 05:27:58 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: add todo Message-ID: <5a181e5e.caa2df0a.c6316.e3d8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93161:8dac9e38c3d5 Date: 2017-11-24 14:27 +0100 http://bitbucket.org/pypy/pypy/changeset/8dac9e38c3d5/ Log: add todo diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -9,3 +9,5 @@ * remove assertions from W_UnicodeObject.__init__ if all the builders pass * what to do with error handlers that go backwards. There were tests in test_codecs that would check for that + +* fix _pypyjson to not use a wrapped dict when decoding an object From pypy.commits at gmail.com Fri Nov 24 09:15:38 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 06:15:38 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: fix encoding to operate on utf-8 encoded strings Message-ID: <5a18298a.e1acdf0a.beeec.a3b8@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93163:5b81f483c459 Date: 2017-11-24 15:14 +0100 http://bitbucket.org/pypy/pypy/changeset/5b81f483c459/ Log: fix encoding to operate on utf-8 encoded strings diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -1,5 +1,5 @@ from rpython.rlib.rstring import StringBuilder -from rpython.rlib.runicode import str_decode_utf_8 +from rpython.rlib import rutf8 from pypy.interpreter import unicodehelper @@ -30,11 +30,8 @@ # the input is a string with only non-special ascii chars return w_string - eh = unicodehelper.decode_error_handler(space) - u = str_decode_utf_8( - s, len(s), None, final=True, errorhandler=eh, - allow_surrogates=True)[0] - sb = StringBuilder(len(u)) + unicodehelper.check_utf8_or_raise(space, s) + sb = StringBuilder(len(s)) sb.append_slice(s, 0, first) else: # We used to check if 'u' contains only safe characters, and return @@ -44,29 +41,31 @@ # a string (with the ascii encoding). This requires two passes # over the characters. So we may as well directly turn it into a # string here --- only one pass. - u = space.unicode_w(w_string) - sb = StringBuilder(len(u)) + s = space.utf8_w(w_string) + sb = StringBuilder(len(s)) first = 0 - for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + it = rutf8.Utf8StringIterator(s) + for i in range(first): + it.next() + for c in it: + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) From pypy.commits at gmail.com Fri Nov 24 09:15:35 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 06:15:35 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: use an actual iterator, to make the code nicer (they work well in rpython nowadays) Message-ID: <5a182987.1cbf1c0a.deee6.0ee3@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93162:6a13aba253bd Date: 2017-11-24 15:07 +0100 http://bitbucket.org/pypy/pypy/changeset/6a13aba253bd/ Log: use an actual iterator, to make the code nicer (they work well in rpython nowadays) diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -702,10 +702,12 @@ self._end = len(utf8s) self._pos = 0 - def done(self): - return self._pos == self._end + def __iter__(self): + return self def next(self): + if self._pos == self._end: + raise StopIteration ret = codepoint_at_pos(self._utf8, self._pos) self._pos = next_codepoint_pos(self._utf8, self._pos) return ret diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -188,6 +188,6 @@ def test_utf8_iterator(arg): u = rutf8.Utf8StringIterator(arg.encode('utf8')) l = [] - while not u.done(): - l.append(unichr(u.next())) + for c in u: + l.append(unichr(c)) assert list(arg) == l From pypy.commits at gmail.com Fri Nov 24 10:13:23 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 07:13:23 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: support for append_utf8 Message-ID: <5a183713.c78c1c0a.ebbca.0ce7@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93164:f5be33826726 Date: 2017-11-24 16:10 +0100 http://bitbucket.org/pypy/pypy/changeset/f5be33826726/ Log: support for append_utf8 diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -687,6 +687,11 @@ self._lgt += 1 unichr_as_utf8_append(self._s, code, True) + def append_utf8(self, utf8, length, flag): + self._flag = combine_flags(self._flag, flag) + self._lgt += length + self._s.append(utf8) + def build(self): return self._s.build() diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -175,6 +175,7 @@ assert s.get_flag() == rutf8.FLAG_REGULAR assert s.get_length() == 9 assert s.build().decode("utf8") == u"foox\u1234foox" + s = rutf8.Utf8StringBuilder() s.append_code(0x1234) assert s.build().decode("utf8") == u"\u1234" @@ -184,6 +185,21 @@ assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES assert s.get_length() == 2 + s = rutf8.Utf8StringBuilder() + s.append_utf8("abc", 3, rutf8.FLAG_ASCII) + assert s.get_flag() == rutf8.FLAG_ASCII + assert s.get_length() == 1 + assert s.build().decode("utf8") == u"abc" + + s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR) + assert s.build().decode("utf8") == u"abc\u1234" + assert s.get_flag() == rutf8.FLAG_REGULAR + assert s.get_length() == 4 + + s.append_code(0xD800) + assert s.get_flag() == rutf8.FLAG_HAS_SURROGATES + assert s.get_length() == 5 + @given(strategies.text()) def test_utf8_iterator(arg): u = rutf8.Utf8StringIterator(arg.encode('utf8')) From pypy.commits at gmail.com Fri Nov 24 10:13:25 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 07:13:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: replace a lot of uses of StringBuilder by Utf8StringBuilder Message-ID: <5a183715.3799df0a.a1cf0.9898@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93165:48da1a44d860 Date: 2017-11-24 16:12 +0100 http://bitbucket.org/pypy/pypy/changeset/48da1a44d860/ Log: replace a lot of uses of StringBuilder by Utf8StringBuilder diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -64,6 +64,11 @@ # - malloced object, which means it has index, then # _index_storage.flags determines the kind + @staticmethod + def from_utf8builder(builder): + return W_UnicodeObject( + builder.build(), builder.get_length(), builder.get_flag()) + def __repr__(self): """representation for debugging purposes""" return "%s(%r)" % (self.__class__.__name__, self._utf8) @@ -344,57 +349,38 @@ return mod_format(space, w_values, self, do_unicode=True) def descr_swapcase(self, space): - selfvalue = self._utf8 - builder = StringBuilder(len(selfvalue)) - flag = self._get_flag() - i = 0 - while i < len(selfvalue): - ch = rutf8.codepoint_at_pos(selfvalue, i) - i = rutf8.next_codepoint_pos(selfvalue, i) + input = self._utf8 + builder = rutf8.Utf8StringBuilder(len(input)) + for ch in rutf8.Utf8StringIterator(input): if unicodedb.isupper(ch): ch = unicodedb.tolower(ch) elif unicodedb.islower(ch): ch = unicodedb.toupper(ch) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - return W_UnicodeObject(builder.build(), self._length, flag) + builder.append_code(ch) + return self.from_utf8builder(builder) def descr_title(self, space): if len(self._utf8) == 0: return self - utf8, flag = self.title_unicode(self._utf8) - return W_UnicodeObject(utf8, self._len(), flag) + return self.title_unicode(self._utf8) @jit.elidable def title_unicode(self, value): input = self._utf8 - builder = StringBuilder(len(input)) - i = 0 + builder = rutf8.Utf8StringBuilder(len(input)) previous_is_cased = False - flag = self._get_flag() - while i < len(input): - ch = rutf8.codepoint_at_pos(input, i) - i = rutf8.next_codepoint_pos(input, i) + for ch in rutf8.Utf8StringIterator(input): if not previous_is_cased: ch = unicodedb.totitle(ch) else: ch = unicodedb.tolower(ch) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) + builder.append_code(ch) previous_is_cased = unicodedb.iscased(ch) - return builder.build(), flag + return self.from_utf8builder(builder) def descr_translate(self, space, w_table): - input = self._utf8 - result = StringBuilder(len(input)) - result_length = 0 - flag = self._get_flag() - i = 0 - while i < len(input): - codepoint = rutf8.codepoint_at_pos(input, i) - i = rutf8.next_codepoint_pos(input, i) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for codepoint in rutf8.Utf8StringIterator(self._utf8): try: w_newval = space.getitem(w_table, space.newint(codepoint)) except OperationError as e: @@ -406,24 +392,19 @@ elif space.isinstance_w(w_newval, space.w_int): codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): - result.append(w_newval._utf8) - flag = rutf8.combine_flags(flag, w_newval._get_flag()) - result_length += w_newval._length + builder.append_utf8( + w_newval._utf8, w_newval._length, w_newval._get_flag()) continue else: raise oefmt(space.w_TypeError, "character mapping must return integer, None " "or unicode") try: - if codepoint >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(result, codepoint, - allow_surrogates=True) - result_length += 1 + builder.append_code(codepoint) except ValueError: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") - return W_UnicodeObject(result.build(), result_length, flag) + return self.from_utf8builder(builder) def descr_find(self, space, w_sub, w_start=None, w_end=None): w_result = self._unwrap_and_search(space, w_sub, w_start, w_end) @@ -534,16 +515,11 @@ return tformat.formatter_field_name_split() def descr_lower(self, space): - builder = StringBuilder(len(self._utf8)) - pos = 0 - flag = self._get_flag() - while pos < len(self._utf8): - lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) - if lower >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True) - pos = rutf8.next_codepoint_pos(self._utf8, pos) - return W_UnicodeObject(builder.build(), self._len(), flag) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for ch in rutf8.Utf8StringIterator(self._utf8): + lower = unicodedb.tolower(ch) + builder.append_code(lower) + return self.from_utf8builder(builder) def descr_isdecimal(self, space): return self._is_generic(space, '_isdecimal') @@ -711,18 +687,11 @@ return space.newlist(strs_w) def descr_upper(self, space): - value = self._utf8 - builder = StringBuilder(len(value)) - flag = self._get_flag() - i = 0 - while i < len(value): - uchar = rutf8.codepoint_at_pos(value, i) - uchar = unicodedb.toupper(uchar) - if uchar >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - i = rutf8.next_codepoint_pos(value, i) - rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True) - return W_UnicodeObject(builder.build(), self._length, flag) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for ch in rutf8.Utf8StringIterator(self._utf8): + ch = unicodedb.toupper(ch) + builder.append_code(ch) + return self.from_utf8builder(builder) @unwrap_spec(width=int) def descr_zfill(self, space, width): @@ -826,22 +795,15 @@ if len(value) == 0: return self._empty() - flag = self._get_flag() - builder = StringBuilder(len(value)) - uchar = rutf8.codepoint_at_pos(value, 0) - i = rutf8.next_codepoint_pos(value, 0) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + it = rutf8.Utf8StringIterator(self._utf8) + uchar = it.next() ch = unicodedb.toupper(uchar) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - while i < len(value): - uchar = rutf8.codepoint_at_pos(value, i) - i = rutf8.next_codepoint_pos(value, i) - ch = unicodedb.tolower(uchar) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - return W_UnicodeObject(builder.build(), self._len(), flag) + builder.append_code(ch) + for ch in it: + ch = unicodedb.tolower(ch) + builder.append_code(ch) + return self.from_utf8builder(builder) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): From pypy.commits at gmail.com Fri Nov 24 10:24:37 2017 From: pypy.commits at gmail.com (cfbolz) Date: Fri, 24 Nov 2017 07:24:37 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: small cleanup of copy-pasted join code Message-ID: <5a1839b5.e6361c0a.e0caa.c69f@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: unicode-utf8 Changeset: r93166:f5a5189e5314 Date: 2017-11-24 16:24 +0100 http://bitbucket.org/pypy/pypy/changeset/f5a5189e5314/ Log: small cleanup of copy-pasted join code diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -498,12 +498,6 @@ def _join_return_one(self, space, w_obj): return space.is_w(space.type(w_obj), space.w_unicode) - def _join_check_item(self, space, w_obj): - if (space.isinstance_w(w_obj, space.w_bytes) or - space.isinstance_w(w_obj, space.w_unicode)): - return 0 - return 1 - def descr_formatter_parser(self, space): from pypy.objspace.std.newformat import unicode_template_formatter tformat = unicode_template_formatter(space, space.utf8_w(self)) @@ -633,13 +627,11 @@ flag = self._get_flag() for i in range(size): w_s = list_w[i] - check_item = self._join_check_item(space, w_s) - if check_item == 1: + if not (space.isinstance_w(w_s, space.w_bytes) or + space.isinstance_w(w_s, space.w_unicode)): raise oefmt(space.w_TypeError, - "sequence item %d: expected string, %T found", + "sequence item %d: expected string or unicode, %T found", i, w_s) - elif check_item == 2: - return self._join_autoconvert(space, list_w) # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) From pypy.commits at gmail.com Fri Nov 24 12:22:06 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 09:22:06 -0800 (PST) Subject: [pypy-commit] pypy default: Keep chipping away at readline_w() Message-ID: <5a18553e.759adf0a.6067e.67fc@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93167:2477eb379774 Date: 2017-11-24 17:20 +0000 http://bitbucket.org/pypy/pypy/changeset/2477eb379774/ Log: Keep chipping away at readline_w() diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -214,44 +214,53 @@ def newlines_get_w(self, space): return space.w_None + def _find_newline_universal(self, line, start, end): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + i = start + while i < end: + ch = line[i] + i += 1 + if ch == '\n': + return i + if ch == '\r': + if start + i >= end: + return i + if line[i] == '\n': + return i + 1 + else: + return i + return -1 + + def _find_marker(self, marker, line, start, end): + for i in range(start, end - len(marker) + 1): + ch = line[i] + if ch == marker[0]: + for j in range(1, len(marker)): + if line[i + j] != marker[j]: + break + else: + return i + len(marker) + return -1 + def _find_line_ending(self, line, start, end): - size = end - start if self.readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - i = start - while True: - # Fast path for non-control chars. - while i < end and line[i] > '\r': - i += 1 - if i >= end: - return -1, size - ch = line[i] - i += 1 - if ch == '\n': - return i, 0 - if ch == '\r': - if line[i] == '\n': - return i + 1, 0 - else: - return i, 0 + i = self._find_newline_universal(line, start, end) + if i < 0: + return i, end + else: + return i, 0 if self.readtranslate: # Newlines are already translated, only search for \n newline = u'\n' else: # Non-universal mode. newline = self.readnl - end_scan = end - len(newline) + 1 - for i in range(start, end_scan): - ch = line[i] - if ch == newline[0]: - for j in range(1, len(newline)): - if line[i + j] != newline[j]: - break - else: - return i + len(newline), 0 - return -1, end_scan - + i = self._find_marker(newline, line, start, end) + if i < 0: + return i, end - len(newline) + 1 + else: + return i, 0 W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, @@ -654,7 +663,7 @@ limit = convert_size(space, w_limit) line = None - remaining = None + remnant = None builder = UnicodeBuilder() while True: @@ -665,44 +674,43 @@ start = endpos = offset_to_buffer = 0 break - if not remaining: + if not remnant: line = self.decoded_chars start = self.decoded_chars_used offset_to_buffer = 0 else: assert self.decoded_chars_used == 0 - line = remaining + self.decoded_chars + line = remnant + self.decoded_chars start = 0 - offset_to_buffer = len(remaining) - remaining = None + offset_to_buffer = len(remnant) + remnant = None line_len = len(line) - endpos, consumed = self._find_line_ending(line, start, line_len) + endpos, end_scan = self._find_line_ending(line, start, line_len) chunked = builder.getlength() if endpos >= 0: if limit >= 0 and endpos >= start + limit - chunked: endpos = start + limit - chunked assert endpos >= 0 break - assert consumed >= 0 + assert end_scan >= 0 - # We can put aside up to `endpos` - endpos = consumed + start - if limit >= 0 and endpos >= start + limit - chunked: + # We can put aside up to `end_scan` + if limit >= 0 and end_scan >= limit - chunked: # Didn't find line ending, but reached length limit endpos = start + limit - chunked assert endpos >= 0 break # No line ending seen yet - put aside current data - if endpos > start: - s = line[start:endpos] + if end_scan > start: + s = line[start:end_scan] builder.append(s) - # There may be some remaining bytes we'll have to prepend to the + # There may be some remaining chars we'll have to prepend to the # next chunk of data - if endpos < line_len: - remaining = line[endpos:] + if end_scan < line_len: + remnant = line[end_scan:] line = None # We have consumed the buffer self._unset_decoded() @@ -715,8 +723,8 @@ if start > 0 or endpos < len(line): line = line[start:endpos] builder.append(line) - elif remaining: - builder.append(remaining) + elif remnant: + builder.append(remnant) result = builder.build() return space.newunicode(result) From pypy.commits at gmail.com Fri Nov 24 14:45:56 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 11:45:56 -0800 (PST) Subject: [pypy-commit] pypy default: More refactoring: deal with the remnant more explicitly and handle size limit inside _find_line_ending() Message-ID: <5a1876f4.0b0f1c0a.ac5e3.0fde@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93168:189c2cce360e Date: 2017-11-24 19:43 +0000 http://bitbucket.org/pypy/pypy/changeset/189c2cce360e/ Log: More refactoring: deal with the remnant more explicitly and handle size limit inside _find_line_ending() diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,27 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(txt, mode, limit): + textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode) + lines = [] + while True: + line = textio.readline(limit) + if limit > 0: + assert len(line) < limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -174,18 +174,16 @@ start = self.pos if limit < 0 or limit > len(self.buf) - self.pos: limit = len(self.buf) - self.pos - assert limit >= 0 - end = start + limit endpos, consumed = self._find_line_ending( # XXX: super inefficient, makes a copy of the entire contents. u"".join(self.buf), start, - end + limit ) if endpos < 0: - endpos = end + endpos = start + limit assert endpos >= 0 self.pos = endpos return space.newunicode(u"".join(self.buf[start:endpos])) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -214,53 +214,49 @@ def newlines_get_w(self, space): return space.w_None - def _find_newline_universal(self, line, start, end): + def _find_newline_universal(self, line, start, limit): # Universal newline search. Find any of \r, \r\n, \n # The decoder ensures that \r\n are not split in two pieces + limit = min(limit, len(line) - start) + end = start + limit i = start while i < end: ch = line[i] i += 1 if ch == '\n': - return i + return i, 0 if ch == '\r': - if start + i >= end: - return i + if i >= end: + break if line[i] == '\n': - return i + 1 + return i + 1, 0 else: - return i - return -1 + return i, 0 + return -1, end - def _find_marker(self, marker, line, start, end): + def _find_marker(self, marker, line, start, limit): + limit = min(limit, len(line) - start) + end = start + limit for i in range(start, end - len(marker) + 1): ch = line[i] if ch == marker[0]: for j in range(1, len(marker)): if line[i + j] != marker[j]: - break + break # from inner loop else: - return i + len(marker) - return -1 + return i + len(marker), 0 + return -1, end - len(marker) + 1 - def _find_line_ending(self, line, start, end): + def _find_line_ending(self, line, start, limit): if self.readuniversal: - i = self._find_newline_universal(line, start, end) - if i < 0: - return i, end - else: - return i, 0 + return self._find_newline_universal(line, start, limit) if self.readtranslate: # Newlines are already translated, only search for \n newline = u'\n' else: # Non-universal mode. newline = self.readnl - i = self._find_marker(newline, line, start, end) - if i < 0: - return i, end - len(newline) + 1 - else: - return i, 0 + return self._find_marker(newline, line, start, limit) W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, @@ -671,35 +667,42 @@ has_data = self._ensure_data(space) if not has_data: # end of file - start = endpos = offset_to_buffer = 0 + start = endpos = 0 break - if not remnant: - line = self.decoded_chars - start = self.decoded_chars_used - offset_to_buffer = 0 - else: + if remnant: + assert not self.readtranslate and self.readnl == u'\r\n' assert self.decoded_chars_used == 0 - line = remnant + self.decoded_chars - start = 0 - offset_to_buffer = len(remnant) - remnant = None + if remnant == u'\r' and self.decoded_chars[0] == u'\n': + builder.append(u'\r\n') + self.decoded_chars_used = 1 + line = remnant = None + start = endpos = 0 + break + else: + builder.append(remnant) + remnant = None + continue + + line = self.decoded_chars + start = self.decoded_chars_used line_len = len(line) - endpos, end_scan = self._find_line_ending(line, start, line_len) - chunked = builder.getlength() + if limit > 0: + remaining = limit - builder.getlength() + assert remaining >= 0 + else: + remaining = sys.maxint + endpos, end_scan = self._find_line_ending(line, start, remaining) + if endpos >= 0: - if limit >= 0 and endpos >= start + limit - chunked: - endpos = start + limit - chunked - assert endpos >= 0 break + assert end_scan >= 0 - # We can put aside up to `end_scan` - if limit >= 0 and end_scan >= limit - chunked: + if limit >= 0 and end_scan - start >= remaining: # Didn't find line ending, but reached length limit - endpos = start + limit - chunked - assert endpos >= 0 + endpos = end_scan break # No line ending seen yet - put aside current data @@ -709,7 +712,7 @@ # There may be some remaining chars we'll have to prepend to the # next chunk of data - if end_scan < line_len: + if end_scan < len(line): remnant = line[end_scan:] line = None # We have consumed the buffer @@ -717,9 +720,7 @@ if line: # Our line ends in the current buffer - decoded_chars_used = endpos - offset_to_buffer - assert decoded_chars_used >= 0 - self.decoded_chars_used = decoded_chars_used + self.decoded_chars_used = endpos if start > 0 or endpos < len(line): line = line[start:endpos] builder.append(line) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_interp_textio.py @@ -0,0 +1,33 @@ +from hypothesis import given, strategies as st, assume +from pypy.module._io.interp_bytesio import W_BytesIO +from pypy.module._io.interp_textio import W_TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(space, txt, mode, limit): + assume(limit != 0) + w_stream = W_BytesIO(space) + w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) + w_textio = W_TextIOWrapper(space) + w_textio.descr_init( + space, w_stream, encoding='utf-8', + w_newline=space.newtext(mode)) + lines = [] + while True: + line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + if limit > 0: + assert len(line) <= limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt From pypy.commits at gmail.com Fri Nov 24 15:20:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 12:20:42 -0800 (PST) Subject: [pypy-commit] pypy default: Replace (pos-if-found, pos-if-not-found) tuple with (position, found) Message-ID: <5a187f1a.4a981c0a.197b.5ae6@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93169:9c9233da7cc4 Date: 2017-11-24 20:18 +0000 http://bitbucket.org/pypy/pypy/changeset/9c9233da7cc4/ Log: Replace (pos-if-found, pos-if-not-found) tuple with (position, found) diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -176,13 +176,13 @@ limit = len(self.buf) - self.pos assert limit >= 0 - endpos, consumed = self._find_line_ending( + endpos, found = self._find_line_ending( # XXX: super inefficient, makes a copy of the entire contents. u"".join(self.buf), start, limit ) - if endpos < 0: + if not found: endpos = start + limit assert endpos >= 0 self.pos = endpos diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -224,15 +224,15 @@ ch = line[i] i += 1 if ch == '\n': - return i, 0 + return i, True if ch == '\r': if i >= end: break if line[i] == '\n': - return i + 1, 0 + return i + 1, True else: - return i, 0 - return -1, end + return i, True + return end, False def _find_marker(self, marker, line, start, limit): limit = min(limit, len(line) - start) @@ -244,8 +244,8 @@ if line[i + j] != marker[j]: break # from inner loop else: - return i + len(marker), 0 - return -1, end - len(marker) + 1 + return i + len(marker), True + return end - len(marker) + 1, False def _find_line_ending(self, line, start, limit): if self.readuniversal: @@ -667,7 +667,7 @@ has_data = self._ensure_data(space) if not has_data: # end of file - start = endpos = 0 + start = end_scan = 0 break if remnant: @@ -677,7 +677,7 @@ builder.append(u'\r\n') self.decoded_chars_used = 1 line = remnant = None - start = endpos = 0 + start = end_scan = 0 break else: builder.append(remnant) @@ -686,23 +686,18 @@ line = self.decoded_chars start = self.decoded_chars_used - - line_len = len(line) if limit > 0: remaining = limit - builder.getlength() assert remaining >= 0 else: remaining = sys.maxint - endpos, end_scan = self._find_line_ending(line, start, remaining) - - if endpos >= 0: + end_scan, found = self._find_line_ending(line, start, remaining) + assert end_scan >= 0 + if found: break - assert end_scan >= 0 - # We can put aside up to `end_scan` if limit >= 0 and end_scan - start >= remaining: # Didn't find line ending, but reached length limit - endpos = end_scan break # No line ending seen yet - put aside current data @@ -720,9 +715,9 @@ if line: # Our line ends in the current buffer - self.decoded_chars_used = endpos - if start > 0 or endpos < len(line): - line = line[start:endpos] + self.decoded_chars_used = end_scan + if start > 0 or end_scan < len(line): + line = line[start:end_scan] builder.append(line) elif remnant: builder.append(remnant) From pypy.commits at gmail.com Fri Nov 24 15:26:03 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 12:26:03 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a18805b.42e61c0a.73c2e.5f8f@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93170:f9a1926628b2 Date: 2017-11-24 20:22 +0000 http://bitbucket.org/pypy/pypy/changeset/f9a1926628b2/ Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,27 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(txt, mode, limit): + textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode) + lines = [] + while True: + line = textio.readline(limit) + if limit > 0: + assert len(line) < limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -174,18 +174,16 @@ start = self.pos if limit < 0 or limit > len(self.buf) - self.pos: limit = len(self.buf) - self.pos + assert limit >= 0 - assert limit >= 0 - end = start + limit - - endpos, consumed = self._find_line_ending( + endpos, found = self._find_line_ending( # XXX: super inefficient, makes a copy of the entire contents. u"".join(self.buf), start, - end + limit ) - if endpos < 0: - endpos = end + if not found: + endpos = start + limit assert endpos >= 0 self.pos = endpos return space.newunicode(u"".join(self.buf[start:endpos])) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -221,44 +221,49 @@ def newlines_get_w(self, space): return space.w_None - def _find_line_ending(self, line, start, end): - size = end - start + def _find_newline_universal(self, line, start, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + limit = min(limit, len(line) - start) + end = start + limit + i = start + while i < end: + ch = line[i] + i += 1 + if ch == '\n': + return i, True + if ch == '\r': + if i >= end: + break + if line[i] == '\n': + return i + 1, True + else: + return i, True + return end, False + + def _find_marker(self, marker, line, start, limit): + limit = min(limit, len(line) - start) + end = start + limit + for i in range(start, end - len(marker) + 1): + ch = line[i] + if ch == marker[0]: + for j in range(1, len(marker)): + if line[i + j] != marker[j]: + break # from inner loop + else: + return i + len(marker), True + return end - len(marker) + 1, False + + def _find_line_ending(self, line, start, limit): if self.readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - i = start - while True: - # Fast path for non-control chars. - while i < end and line[i] > '\r': - i += 1 - if i >= end: - return -1, size - ch = line[i] - i += 1 - if ch == '\n': - return i, 0 - if ch == '\r': - if line[i] == '\n': - return i + 1, 0 - else: - return i, 0 + return self._find_newline_universal(line, start, limit) if self.readtranslate: # Newlines are already translated, only search for \n newline = '\n' else: # Non-universal mode. newline = self.readnl - end_scan = end - len(newline) + 1 - for i in range(start, end_scan): - ch = line[i] - if ch == newline[0]: - for j in range(1, len(newline)): - if line[i + j] != newline[j]: - break - else: - return i + len(newline), 0 - return -1, end_scan - + return self._find_marker(newline, line, start, limit) W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, @@ -661,7 +666,7 @@ limit = convert_size(space, w_limit) line = None - remaining = None + remnant = None builder = StringBuilder() while True: @@ -669,61 +674,60 @@ has_data = self._ensure_data(space) if not has_data: # end of file - start = endpos = offset_to_buffer = 0 + start = end_scan = 0 break - if not remaining: - line = self.decoded_chars - start = self.decoded_chars_used - offset_to_buffer = 0 + if remnant: + assert not self.readtranslate and self.readnl == '\r\n' + assert self.decoded_chars_used == 0 + if remnant == '\r' and self.decoded_chars[0] == '\n': + builder.append('\r\n') + self.decoded_chars_used = 1 + line = remnant = None + start = end_scan = 0 + break + else: + builder.append(remnant) + remnant = None + continue + + line = self.decoded_chars + start = self.decoded_chars_used + if limit > 0: + remaining = limit - builder.getlength() + assert remaining >= 0 else: - assert self.decoded_chars_used == 0 - line = remaining + self.decoded_chars - start = 0 - offset_to_buffer = len(remaining) - remaining = None + remaining = sys.maxint + end_scan, found = self._find_line_ending(line, start, remaining) + assert end_scan >= 0 + if found: + break - line_len = len(line) - endpos, consumed = self._find_line_ending(line, start, line_len) - chunked = builder.getlength() - if endpos >= 0: - if limit >= 0 and endpos >= start + limit - chunked: - endpos = start + limit - chunked - assert endpos >= 0 - break - assert consumed >= 0 - - # We can put aside up to `endpos` - endpos = consumed + start - if limit >= 0 and endpos >= start + limit - chunked: + if limit >= 0 and end_scan - start >= remaining: # Didn't find line ending, but reached length limit - endpos = start + limit - chunked - assert endpos >= 0 break # No line ending seen yet - put aside current data - if endpos > start: - s = line[start:endpos] + if end_scan > start: + s = line[start:end_scan] builder.append(s) - # There may be some remaining bytes we'll have to prepend to the + # There may be some remaining chars we'll have to prepend to the # next chunk of data - if endpos < line_len: - remaining = line[endpos:] + if end_scan < len(line): + remnant = line[end_scan:] line = None # We have consumed the buffer self._unset_decoded() if line: # Our line ends in the current buffer - decoded_chars_used = endpos - offset_to_buffer - assert decoded_chars_used >= 0 - self.decoded_chars_used = decoded_chars_used - if start > 0 or endpos < len(line): - line = line[start:endpos] + self.decoded_chars_used = end_scan + if start > 0 or end_scan < len(line): + line = line[start:end_scan] builder.append(line) - elif remaining: - builder.append(remaining) + elif remnant: + builder.append(remnant) result = builder.build() return space.new_from_utf8(result) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_interp_textio.py @@ -0,0 +1,33 @@ +from hypothesis import given, strategies as st, assume +from pypy.module._io.interp_bytesio import W_BytesIO +from pypy.module._io.interp_textio import W_TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(space, txt, mode, limit): + assume(limit != 0) + w_stream = W_BytesIO(space) + w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) + w_textio = W_TextIOWrapper(space) + w_textio.descr_init( + space, w_stream, encoding='utf-8', + w_newline=space.newtext(mode)) + lines = [] + while True: + line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + if limit > 0: + assert len(line) <= limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ cffi>=1.4.0 -vmprof>=0.4.10 # required to parse log files in rvmprof tests + +# parse log files in rvmprof tests +vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -9,6 +9,7 @@ from rpython.rtyper.tool import rffi_platform as platform from rpython.rlib import rthread, jit from rpython.rlib.objectmodel import we_are_translated +from rpython.config.translationoption import get_translation_config class VMProfPlatformUnsupported(Exception): pass @@ -133,11 +134,17 @@ #endif """]) +if get_translation_config() is None: + # tests need the full eci here + _eci = global_eci +else: + _eci = auto_eci + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=auto_eci, + rffi.INT, compilation_info=_eci, _nowrapper=True) vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=auto_eci, + lltype.Void, compilation_info=_eci, _nowrapper=True) From pypy.commits at gmail.com Fri Nov 24 19:57:47 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 16:57:47 -0800 (PST) Subject: [pypy-commit] pypy default: Specify the encoding, for systems where utf-8 isn't the default Message-ID: <5a18c00b.cc87df0a.b36f6.6725@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93171:9b3b4676e3b7 Date: 2017-11-25 00:55 +0000 http://bitbucket.org/pypy/pypy/changeset/9b3b4676e3b7/ Log: Specify the encoding, for systems where utf-8 isn't the default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -14,7 +14,8 @@ mode=st.sampled_from(['\r', '\n', '\r\n', '']), limit=st.integers(min_value=-1)) def test_readline(txt, mode, limit): - textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode) + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) lines = [] while True: line = textio.readline(limit) From pypy.commits at gmail.com Fri Nov 24 21:31:05 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 18:31:05 -0800 (PST) Subject: [pypy-commit] pypy default: Extract UnicodeIO object from W_StringIO Message-ID: <5a18d5e9.c9b81c0a.abdfc.5dc5@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93173:1d90f3200c9c Date: 2017-11-25 02:29 +0000 http://bitbucket.org/pypy/pypy/changeset/1d90f3200c9c/ Log: Extract UnicodeIO object from W_StringIO diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -2,21 +2,65 @@ from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder +from pypy.module._io.interp_textio import ( + W_TextIOBase, W_IncrementalNewlineDecoder) from pypy.module._io.interp_iobase import convert_size +class UnicodeIO(object): + def __init__(self, data=None, pos=0): + if data is None: + data = [] + self.data = data + self.pos = pos + + def resize(self, newlength): + if len(self.data) > newlength: + self.data = self.data[:newlength] + if len(self.data) < newlength: + self.data.extend([u'\0'] * (newlength - len(self.data))) + + def read(self, size): + start = self.pos + available = len(self.data) - start + if available <= 0: + return u'' + if size >= 0 and size <= available: + end = start + size + else: + end = len(self.data) + assert 0 <= start <= end + self.pos = end + return u''.join(self.data[start:end]) + + def write(self, string): + length = len(string) + if self.pos + length > len(self.data): + self.resize(self.pos + length) + + for i in range(length): + self.data[self.pos + i] = string[i] + self.pos += length + + def seek(self, pos): + self.pos = pos + + def truncate(self, size): + if size < len(self.data): + self.resize(size) + + def getvalue(self): + return u''.join(self.data) + class W_StringIO(W_TextIOBase): def __init__(self, space): W_TextIOBase.__init__(self, space) - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() - @unwrap_spec(w_newline = WrappedDefault("\n")) + @unwrap_spec(w_newline=WrappedDefault("\n")) def descr_init(self, space, w_initvalue=None, w_newline=None): # In case __init__ is called multiple times - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() self.w_decoder = None self.readnl = None self.writenl = None @@ -27,7 +71,7 @@ newline = space.unicode_w(w_newline) if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + newline != u"\r" and newline != u"\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -50,7 +94,7 @@ if not space.is_none(w_initvalue): self.write_w(space, w_initvalue) - self.pos = 0 + self.buf.pos = 0 def descr_getstate(self, space): w_initialval = self.getvalue_w(space) @@ -58,9 +102,9 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.newunicode(self.readnl)) # YYY return space.newtuple([ - w_initialval, w_readnl, space.newint(self.pos), w_dict + w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) def descr_setstate(self, space, w_state): @@ -69,34 +113,33 @@ # We allow the state tuple to be longer than 4, because we may need # someday to extend the object's state without breaking # backwards-compatibility - if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4: + if (not space.isinstance_w(w_state, space.w_tuple) + or space.len_w(w_state) < 4): raise oefmt(space.w_TypeError, "%T.__setstate__ argument should be a 4-tuple, got %T", self, w_state) w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4) + if not space.isinstance_w(w_initval, space.w_unicode): + raise oefmt(space.w_TypeError, + "unicode argument expected, got '%T'", w_initval) # Initialize state - self.descr_init(space, w_initval, w_readnl) + self.descr_init(space, None, w_readnl) - # Restore the buffer state. Even if __init__ did initialize the buffer, - # we have to initialize it again since __init__ may translates the - # newlines in the inital_value string. We clearly do not want that + # Restore the buffer state. We're not doing it via __init__ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely initval = space.unicode_w(w_initval) - size = len(initval) - self.resize_buffer(size) - self.buf = list(initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.pos = pos + self.buf = UnicodeIO(list(initval), pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): - raise oefmt(space.w_TypeError, - "fourth item of state should be a dict, got a %T", - w_dict) + raise oefmt( + space.w_TypeError, + "fourth item of state should be a dict, got a %T", w_dict) # Alternatively, we could replace the internal dictionary # completely. However, it seems more practical to just update it. space.call_method(self.w_dict, "update", w_dict) @@ -107,86 +150,56 @@ message = "I/O operation on closed file" raise OperationError(space.w_ValueError, space.newtext(message)) - def resize_buffer(self, newlength): - if len(self.buf) > newlength: - self.buf = self.buf[:newlength] - if len(self.buf) < newlength: - self.buf.extend([u'\0'] * (newlength - len(self.buf))) - - def write(self, string): - length = len(string) - if self.pos + length > len(self.buf): - self.resize_buffer(self.pos + length) - - for i in range(length): - self.buf[self.pos + i] = string[i] - self.pos += length - def write_w(self, space, w_obj): if not space.isinstance_w(w_obj, space.w_unicode): raise oefmt(space.w_TypeError, "unicode argument expected, got '%T'", w_obj) self._check_closed(space) - orig_size = space.len_w(w_obj) if self.w_decoder is not None: w_decoded = space.call_method( - self.w_decoder, "decode", w_obj, space.w_True - ) + self.w_decoder, "decode", w_obj, space.w_True) else: w_decoded = w_obj - if self.writenl: w_decoded = space.call_method( - w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl) - ) + w_decoded, "replace", + space.newtext("\n"), space.newunicode(self.writenl)) + string = space.unicode_w(w_decoded) + if string: + self.buf.write(string) - string = space.unicode_w(w_decoded) - size = len(string) - - if size: - self.write(string) return space.newint(orig_size) def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - start = self.pos - available = len(self.buf) - start - if available <= 0: - return space.newunicode(u"") - if size >= 0 and size <= available: - end = start + size - else: - end = len(self.buf) - assert 0 <= start <= end - self.pos = end - return space.newunicode(u''.join(self.buf[start:end])) + return space.newunicode(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) limit = convert_size(space, w_limit) - if self.pos >= len(self.buf): + if self.buf.pos >= len(self.buf.data): return space.newunicode(u"") - start = self.pos - if limit < 0 or limit > len(self.buf) - self.pos: - limit = len(self.buf) - self.pos + start = self.buf.pos + if limit < 0 or limit > len(self.buf.data) - self.buf.pos: + limit = len(self.buf.data) - self.buf.pos assert limit >= 0 endpos, found = self._find_line_ending( # XXX: super inefficient, makes a copy of the entire contents. - u"".join(self.buf), + u"".join(self.buf.data), start, limit ) if not found: endpos = start + limit assert endpos >= 0 - self.pos = endpos - return space.newunicode(u"".join(self.buf[start:endpos])) + self.buf.pos = endpos + return space.newunicode(u"".join(self.buf.data[start:endpos])) @unwrap_spec(pos=int, mode=int) def seek_w(self, space, pos, mode=0): @@ -202,32 +215,27 @@ # XXX: this makes almost no sense, but its how CPython does it. if mode == 1: - pos = self.pos + pos = self.buf.pos elif mode == 2: - pos = len(self.buf) - + pos = len(self.buf.data) assert pos >= 0 - self.pos = pos + self.buf.seek(pos) return space.newint(pos) def truncate_w(self, space, w_size=None): self._check_closed(space) if space.is_none(w_size): - size = self.pos + size = self.buf.pos else: size = space.int_w(w_size) - if size < 0: raise oefmt(space.w_ValueError, "Negative size value %d", size) - - if size < len(self.buf): - self.resize_buffer(size) - + self.buf.truncate(size) return space.newint(size) def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(u''.join(self.buf)) + return space.newunicode(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) From pypy.commits at gmail.com Fri Nov 24 21:31:03 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 18:31:03 -0800 (PST) Subject: [pypy-commit] pypy default: Extract DecodeBuffer object from W_TextIOWrapper Message-ID: <5a18d5e7.d31b1c0a.6e981.26d4@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93172:e1dbf4f46c45 Date: 2017-11-25 01:15 +0000 http://bitbucket.org/pypy/pypy/changeset/e1dbf4f46c45/ Log: Extract DecodeBuffer object from W_TextIOWrapper diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -333,6 +333,45 @@ self.input = input +class DecodeBuffer(object): + def __init__(self): + self.text = None + self.pos = 0 + + def set(self, space, w_decoded): + check_decoded(space, w_decoded) + self.text = space.unicode_w(w_decoded) + self.pos = 0 + + def reset(self): + self.text = None + self.pos = 0 + + def get_chars(self, size): + if self.text is None: + return u"" + + available = len(self.text) - self.pos + if size < 0 or size > available: + size = available + assert size >= 0 + + if self.pos > 0 or size < available: + start = self.pos + end = self.pos + size + assert start >= 0 + assert end >= 0 + chars = self.text[start:end] + else: + chars = self.text + + self.pos += size + return chars + + def has_data(self): + return (self.text is not None and self.pos < len(self.text)) + + def check_decoded(space, w_decoded): if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" @@ -346,8 +385,7 @@ self.w_encoder = None self.w_decoder = None - self.decoded_chars = None # buffer for text returned from decoder - self.decoded_chars_used = 0 # offset into _decoded_chars for read() + self.decoded = DecodeBuffer() self.pending_bytes = None # list of bytes objects waiting to be # written, or NULL self.chunk_size = 8192 @@ -515,44 +553,10 @@ # _____________________________________________________________ # read methods - def _unset_decoded(self): - self.decoded_chars = None - self.decoded_chars_used = 0 - - def _set_decoded(self, space, w_decoded): - check_decoded(space, w_decoded) - self.decoded_chars = space.unicode_w(w_decoded) - self.decoded_chars_used = 0 - - def _get_decoded_chars(self, size): - if self.decoded_chars is None: - return u"" - - available = len(self.decoded_chars) - self.decoded_chars_used - if size < 0 or size > available: - size = available - assert size >= 0 - - if self.decoded_chars_used > 0 or size < available: - start = self.decoded_chars_used - end = self.decoded_chars_used + size - assert start >= 0 - assert end >= 0 - chars = self.decoded_chars[start:end] - else: - chars = self.decoded_chars - - self.decoded_chars_used += size - return chars - - def _has_data(self): - return (self.decoded_chars is not None and - self.decoded_chars_used < len(self.decoded_chars)) - def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). + is placed in self.decoded (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain buffered in the decoder, yet to be converted.""" @@ -572,7 +576,7 @@ dec_buffer = None dec_flags = 0 - # Read a chunk, decode it, and put the result in self._decoded_chars + # Read a chunk, decode it, and put the result in self.decoded w_input = space.call_method(self.w_buffer, "read1", space.newint(self.chunk_size)) @@ -584,7 +588,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -597,10 +601,10 @@ return not eof def _ensure_data(self, space): - while not self._has_data(): + while not self.decoded.has_data(): try: if not self._read_chunk(space): - self._unset_decoded() + self.decoded.reset() self.snapshot = None return False except OperationError as e: @@ -633,7 +637,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self._get_decoded_chars(-1)) + w_result = space.newunicode(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -645,7 +649,7 @@ while remaining > 0: if not self._ensure_data(space): break - data = self._get_decoded_chars(remaining) + data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) @@ -672,10 +676,10 @@ if remnant: assert not self.readtranslate and self.readnl == u'\r\n' - assert self.decoded_chars_used == 0 - if remnant == u'\r' and self.decoded_chars[0] == u'\n': + assert self.decoded.pos == 0 + if remnant == u'\r' and self.decoded.text[0] == u'\n': builder.append(u'\r\n') - self.decoded_chars_used = 1 + self.decoded.pos = 1 line = remnant = None start = end_scan = 0 break @@ -684,8 +688,8 @@ remnant = None continue - line = self.decoded_chars - start = self.decoded_chars_used + line = self.decoded.text + start = self.decoded.pos if limit > 0: remaining = limit - builder.getlength() assert remaining >= 0 @@ -711,11 +715,11 @@ remnant = line[end_scan:] line = None # We have consumed the buffer - self._unset_decoded() + self.decoded.reset() if line: # Our line ends in the current buffer - self.decoded_chars_used = end_scan + self.decoded.pos = end_scan if start > 0 or end_scan < len(line): line = line[start:end_scan] builder.append(line) @@ -855,7 +859,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._unset_decoded() + self.decoded.reset() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -880,7 +884,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._unset_decoded() + self.decoded.reset() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -901,13 +905,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded_chars) < cookie.chars_to_skip: + if len(self.decoded.text) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded_chars_used = cookie.chars_to_skip + self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -933,7 +937,7 @@ w_pos = space.call_method(self.w_buffer, "tell") if self.w_decoder is None or self.snapshot is None: - assert not self.decoded_chars + assert not self.decoded.text return w_pos cookie = PositionCookie(space.bigint_w(w_pos)) @@ -944,11 +948,11 @@ cookie.start_pos -= len(input) # How many decoded characters have been used up since the snapshot? - if not self.decoded_chars_used: + if not self.decoded.pos: # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded_chars_used + chars_to_skip = self.decoded.pos # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. From pypy.commits at gmail.com Fri Nov 24 22:49:00 2017 From: pypy.commits at gmail.com (rlamy) Date: Fri, 24 Nov 2017 19:49:00 -0800 (PST) Subject: [pypy-commit] pypy default: Add readline() and readline_universal() methods to UnicodeIO, and stop sharing the implementation with textio Message-ID: <5a18e82c.8faedf0a.ec3e7.9c84@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93174:82244130bf34 Date: 2017-11-25 03:46 +0000 http://bitbucket.org/pypy/pypy/changeset/82244130bf34/ Log: Add readline() and readline_universal() methods to UnicodeIO, and stop sharing the implementation with textio diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -32,6 +32,56 @@ self.pos = end return u''.join(self.data[start:end]) + def _convert_limit(self, limit): + if limit < 0 or limit > len(self.data) - self.pos: + limit = len(self.data) - self.pos + assert limit >= 0 + return limit + + def readline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + limit = self._convert_limit(limit) + start = self.pos + end = start + limit + pos = start + while pos < end: + ch = self.data[pos] + pos += 1 + if ch == '\n': + break + if ch == '\r': + if pos >= end: + break + if self.data[pos] == '\n': + pos += 1 + break + else: + break + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def readline(self, marker, limit): + start = self.pos + limit = self._convert_limit(limit) + end = start + limit + found = False + for pos in range(start, end - len(marker) + 1): + ch = self.data[pos] + if ch == marker[0]: + for j in range(1, len(marker)): + if self.data[pos + j] != marker[j]: + break # from inner loop + else: + pos += len(marker) + found = True + break + if not found: + pos = end + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + def write(self, string): length = len(string) if self.pos + length > len(self.data): @@ -180,26 +230,17 @@ def readline_w(self, space, w_limit=None): self._check_closed(space) limit = convert_size(space, w_limit) + if self.readuniversal: + result = self.buf.readline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + newline = self.readnl + result = self.buf.readline(newline, limit) + return space.newunicode(result) - if self.buf.pos >= len(self.buf.data): - return space.newunicode(u"") - - start = self.buf.pos - if limit < 0 or limit > len(self.buf.data) - self.buf.pos: - limit = len(self.buf.data) - self.buf.pos - assert limit >= 0 - - endpos, found = self._find_line_ending( - # XXX: super inefficient, makes a copy of the entire contents. - u"".join(self.buf.data), - start, - limit - ) - if not found: - endpos = start + limit - assert endpos >= 0 - self.buf.pos = endpos - return space.newunicode(u"".join(self.buf.data[start:endpos])) @unwrap_spec(pos=int, mode=int) def seek_w(self, space, pos, mode=0): From pypy.commits at gmail.com Sat Nov 25 12:55:48 2017 From: pypy.commits at gmail.com (mattip) Date: Sat, 25 Nov 2017 09:55:48 -0800 (PST) Subject: [pypy-commit] buildbot default: cleanup, add ensurepip step for TranslatedTests (downloading only) builders (ARM) Message-ID: <5a19aea4.32acdf0a.683e1.26ec@mx.google.com> Author: Matti Picus Branch: Changeset: r1044:a64690c374cf Date: 2017-11-25 19:55 +0200 http://bitbucket.org/pypy/buildbot/changeset/a64690c374cf/ Log: cleanup, add ensurepip step for TranslatedTests (downloading only) builders (ARM) builders that translate call ensurepip as part of translation and in packaging diff --git a/bot2/pypybuildbot/builds.py b/bot2/pypybuildbot/builds.py --- a/bot2/pypybuildbot/builds.py +++ b/bot2/pypybuildbot/builds.py @@ -475,6 +475,10 @@ clean = 'rm -rf pypy-venv' target = Property('target_path') factory.addStep(ShellCmd( + description="ensurepip", + command=prefix + [target, '-mensurepip'], + flunkOnFailure=True)) + factory.addStep(ShellCmd( description="clean old virtualenv", command=clean, workdir='venv', @@ -750,12 +754,6 @@ haltOnFailure=True, workdir='.')) self.addStep(ShellCmd( - description="copy ctypes resource cache", - # eventually remove this step, not needed after 5.1 - command=['cp', '-rv', 'pypy-c/lib_pypy/ctypes_config_cache', 'build/lib_pypy'], - haltOnFailure=False, - workdir='.')) - self.addStep(ShellCmd( description="copy cffi import libraries", command='cp -rv pypy-c/lib_pypy/*.so build/lib_pypy', haltOnFailure=True, From pypy.commits at gmail.com Sat Nov 25 20:24:27 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 17:24:27 -0800 (PST) Subject: [pypy-commit] pypy default: Add some tests for DecodeBuffer Message-ID: <5a1a17cb.c4c21c0a.f510f.f2b6@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93176:e8e611955c9a Date: 2017-11-26 01:22 +0000 http://bitbucket.org/pypy/pypy/changeset/e8e611955c9a/ Log: Add some tests for DecodeBuffer diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -290,8 +290,8 @@ class DecodeBuffer(object): - def __init__(self): - self.text = None + def __init__(self, text=None): + self.text = text self.pos = 0 def set(self, space, w_decoded): diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,6 +1,10 @@ -from hypothesis import given, strategies as st, assume +import pytest +try: + from hypothesis import given, strategies as st, assume +except ImportError: + pytest.skip("hypothesis required") from pypy.module._io.interp_bytesio import W_BytesIO -from pypy.module._io.interp_textio import W_TextIOWrapper +from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer LINESEP = ['', '\r', '\n', '\r\n'] @@ -31,3 +35,34 @@ else: break assert u''.join(lines) == txt + + at given(st.text()) +def test_read_buffer(text): + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text + assert buf.exhausted() + + at given(st.text(), st.lists(st.integers(min_value=0))) +def test_readn_buffer(text, sizes): + buf = DecodeBuffer(text) + strings = [] + for n in sizes: + s = buf.get_chars(n) + if not buf.exhausted(): + assert len(s) == n + else: + assert len(s) <= n + strings.append(s) + assert ''.join(strings) == text[:sum(sizes)] + + at given(st.text()) +def test_next_char(text): + buf = DecodeBuffer(text) + chars = [] + try: + while True: + chars.append(buf.next_char()) + except StopIteration: + pass + assert buf.exhausted() + assert u''.join(chars) == text From pypy.commits at gmail.com Sat Nov 25 20:24:24 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 17:24:24 -0800 (PST) Subject: [pypy-commit] pypy default: Refactor readline_w() and move most of the logic to DecodeBuffer Message-ID: <5a1a17c8.cb3a1c0a.79405.4011@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93175:65f3ab0d10e3 Date: 2017-11-26 00:54 +0000 http://bitbucket.org/pypy/pypy/changeset/65f3ab0d10e3/ Log: Refactor readline_w() and move most of the logic to DecodeBuffer diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -214,50 +214,6 @@ def newlines_get_w(self, space): return space.w_None - def _find_newline_universal(self, line, start, limit): - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - limit = min(limit, len(line) - start) - end = start + limit - i = start - while i < end: - ch = line[i] - i += 1 - if ch == '\n': - return i, True - if ch == '\r': - if i >= end: - break - if line[i] == '\n': - return i + 1, True - else: - return i, True - return end, False - - def _find_marker(self, marker, line, start, limit): - limit = min(limit, len(line) - start) - end = start + limit - for i in range(start, end - len(marker) + 1): - ch = line[i] - if ch == marker[0]: - for j in range(1, len(marker)): - if line[i + j] != marker[j]: - break # from inner loop - else: - return i + len(marker), True - return end - len(marker) + 1, False - - def _find_line_ending(self, line, start, limit): - if self.readuniversal: - return self._find_newline_universal(line, start, limit) - if self.readtranslate: - # Newlines are already translated, only search for \n - newline = u'\n' - else: - # Non-universal mode. - newline = self.readnl - return self._find_marker(newline, line, start, limit) - W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, __new__ = generic_new_descr(W_TextIOBase), @@ -369,7 +325,88 @@ return chars def has_data(self): - return (self.text is not None and self.pos < len(self.text)) + return (self.text is not None and not self.exhausted()) + + def exhausted(self): + return self.pos >= len(self.text) + + def next_char(self): + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + self.pos += 1 + return ch + + def peek_char(self): + # like next_char, but doesn't advance pos + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + return ch + + def find_newline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == u'\n': + return True + if ch == u'\r': + if scanned >= limit: + return False + try: + ch = self.peek_char() + except StopIteration: + return False + if ch == u'\n': + self.next_char() + return True + else: + return True + return False + + def find_crlf(self, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + scanned += 1 + if ch == u'\r': + if scanned >= limit: + return False + try: + if self.peek_char() == u'\n': + self.next_char() + return True + except StopIteration: + # This is the tricky case: we found a \r right at the end + self.pos -= 1 + return False + return False + + def find_char(self, marker, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == marker: + return True + scanned += 1 + return False def check_decoded(space, w_decoded): @@ -655,23 +692,36 @@ return space.newunicode(builder.build()) + def _scan_line_ending(self, limit): + if self.readuniversal: + return self.decoded.find_newline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + # Non-universal mode. + newline = self.readnl + if newline == u'\r\n': + return self.decoded.find_crlf(limit) + else: + return self.decoded.find_char(newline[0], limit) + def readline_w(self, space, w_limit=None): self._check_attached(space) self._check_closed(space) self._writeflush(space) limit = convert_size(space, w_limit) - - line = None remnant = None builder = UnicodeBuilder() - while True: # First, get some data if necessary has_data = self._ensure_data(space) if not has_data: # end of file - start = end_scan = 0 + if remnant: + builder.append(remnant) break if remnant: @@ -680,52 +730,36 @@ if remnant == u'\r' and self.decoded.text[0] == u'\n': builder.append(u'\r\n') self.decoded.pos = 1 - line = remnant = None - start = end_scan = 0 + remnant = None break else: builder.append(remnant) remnant = None continue - line = self.decoded.text - start = self.decoded.pos if limit > 0: remaining = limit - builder.getlength() assert remaining >= 0 else: - remaining = sys.maxint - end_scan, found = self._find_line_ending(line, start, remaining) - assert end_scan >= 0 - if found: + remaining = -1 + start = self.decoded.pos + assert start >= 0 + found = self._scan_line_ending(remaining) + end_scan = self.decoded.pos + if end_scan > start: + s = self.decoded.text[start:end_scan] + builder.append(s) + + if found or (limit >= 0 and builder.getlength() >= limit): break - if limit >= 0 and end_scan - start >= remaining: - # Didn't find line ending, but reached length limit - break - - # No line ending seen yet - put aside current data - if end_scan > start: - s = line[start:end_scan] - builder.append(s) - # There may be some remaining chars we'll have to prepend to the # next chunk of data - if end_scan < len(line): - remnant = line[end_scan:] - line = None + if not self.decoded.exhausted(): + remnant = self.decoded.get_chars(-1) # We have consumed the buffer self.decoded.reset() - if line: - # Our line ends in the current buffer - self.decoded.pos = end_scan - if start > 0 or end_scan < len(line): - line = line[start:end_scan] - builder.append(line) - elif remnant: - builder.append(remnant) - result = builder.build() return space.newunicode(result) From pypy.commits at gmail.com Sat Nov 25 20:29:38 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 17:29:38 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: hg merge default Message-ID: <5a1a1902.5a86df0a.3270c.5cd2@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93177:a40f7eee2bcf Date: 2017-11-26 01:27 +0000 http://bitbucket.org/pypy/pypy/changeset/a40f7eee2bcf/ Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -14,7 +14,8 @@ mode=st.sampled_from(['\r', '\n', '\r\n', '']), limit=st.integers(min_value=-1)) def test_readline(txt, mode, limit): - textio = TextIOWrapper(BytesIO(txt.encode('utf-8')), newline=mode) + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) lines = [] while True: line = textio.readline(limit) diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -2,21 +2,115 @@ from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder +from pypy.module._io.interp_textio import ( + W_TextIOBase, W_IncrementalNewlineDecoder) from pypy.module._io.interp_iobase import convert_size +class UnicodeIO(object): + def __init__(self, data=None, pos=0): + if data is None: + data = [] + self.data = data + self.pos = pos + + def resize(self, newlength): + if len(self.data) > newlength: + self.data = self.data[:newlength] + if len(self.data) < newlength: + self.data.extend([u'\0'] * (newlength - len(self.data))) + + def read(self, size): + start = self.pos + available = len(self.data) - start + if available <= 0: + return u'' + if size >= 0 and size <= available: + end = start + size + else: + end = len(self.data) + assert 0 <= start <= end + self.pos = end + return u''.join(self.data[start:end]) + + def _convert_limit(self, limit): + if limit < 0 or limit > len(self.data) - self.pos: + limit = len(self.data) - self.pos + assert limit >= 0 + return limit + + def readline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + limit = self._convert_limit(limit) + start = self.pos + end = start + limit + pos = start + while pos < end: + ch = self.data[pos] + pos += 1 + if ch == '\n': + break + if ch == '\r': + if pos >= end: + break + if self.data[pos] == '\n': + pos += 1 + break + else: + break + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def readline(self, marker, limit): + start = self.pos + limit = self._convert_limit(limit) + end = start + limit + found = False + for pos in range(start, end - len(marker) + 1): + ch = self.data[pos] + if ch == marker[0]: + for j in range(1, len(marker)): + if self.data[pos + j] != marker[j]: + break # from inner loop + else: + pos += len(marker) + found = True + break + if not found: + pos = end + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def write(self, string): + length = len(string) + if self.pos + length > len(self.data): + self.resize(self.pos + length) + + for i in range(length): + self.data[self.pos + i] = string[i] + self.pos += length + + def seek(self, pos): + self.pos = pos + + def truncate(self, size): + if size < len(self.data): + self.resize(size) + + def getvalue(self): + return u''.join(self.data) + class W_StringIO(W_TextIOBase): def __init__(self, space): W_TextIOBase.__init__(self, space) - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() - @unwrap_spec(w_newline = WrappedDefault("\n")) + @unwrap_spec(w_newline=WrappedDefault("\n")) def descr_init(self, space, w_initvalue=None, w_newline=None): # In case __init__ is called multiple times - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() self.w_decoder = None self.readnl = None self.writenl = None @@ -27,7 +121,7 @@ newline = space.unicode_w(w_newline) if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + newline != u"\r" and newline != u"\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -50,7 +144,7 @@ if not space.is_none(w_initvalue): self.write_w(space, w_initvalue) - self.pos = 0 + self.buf.pos = 0 def descr_getstate(self, space): w_initialval = self.getvalue_w(space) @@ -58,9 +152,9 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.newunicode(self.readnl)) # YYY return space.newtuple([ - w_initialval, w_readnl, space.newint(self.pos), w_dict + w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) def descr_setstate(self, space, w_state): @@ -69,34 +163,33 @@ # We allow the state tuple to be longer than 4, because we may need # someday to extend the object's state without breaking # backwards-compatibility - if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4: + if (not space.isinstance_w(w_state, space.w_tuple) + or space.len_w(w_state) < 4): raise oefmt(space.w_TypeError, "%T.__setstate__ argument should be a 4-tuple, got %T", self, w_state) w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4) + if not space.isinstance_w(w_initval, space.w_unicode): + raise oefmt(space.w_TypeError, + "unicode argument expected, got '%T'", w_initval) # Initialize state - self.descr_init(space, w_initval, w_readnl) + self.descr_init(space, None, w_readnl) - # Restore the buffer state. Even if __init__ did initialize the buffer, - # we have to initialize it again since __init__ may translates the - # newlines in the inital_value string. We clearly do not want that + # Restore the buffer state. We're not doing it via __init__ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely initval = space.unicode_w(w_initval) - size = len(initval) - self.resize_buffer(size) - self.buf = list(initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.pos = pos + self.buf = UnicodeIO(list(initval), pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): - raise oefmt(space.w_TypeError, - "fourth item of state should be a dict, got a %T", - w_dict) + raise oefmt( + space.w_TypeError, + "fourth item of state should be a dict, got a %T", w_dict) # Alternatively, we could replace the internal dictionary # completely. However, it seems more practical to just update it. space.call_method(self.w_dict, "update", w_dict) @@ -107,86 +200,47 @@ message = "I/O operation on closed file" raise OperationError(space.w_ValueError, space.newtext(message)) - def resize_buffer(self, newlength): - if len(self.buf) > newlength: - self.buf = self.buf[:newlength] - if len(self.buf) < newlength: - self.buf.extend([u'\0'] * (newlength - len(self.buf))) - - def write(self, string): - length = len(string) - if self.pos + length > len(self.buf): - self.resize_buffer(self.pos + length) - - for i in range(length): - self.buf[self.pos + i] = string[i] - self.pos += length - def write_w(self, space, w_obj): if not space.isinstance_w(w_obj, space.w_unicode): raise oefmt(space.w_TypeError, "unicode argument expected, got '%T'", w_obj) self._check_closed(space) - orig_size = space.len_w(w_obj) if self.w_decoder is not None: w_decoded = space.call_method( - self.w_decoder, "decode", w_obj, space.w_True - ) + self.w_decoder, "decode", w_obj, space.w_True) else: w_decoded = w_obj - if self.writenl: w_decoded = space.call_method( - w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl) - ) + w_decoded, "replace", + space.newtext("\n"), space.newunicode(self.writenl)) + string = space.unicode_w(w_decoded) + if string: + self.buf.write(string) - string = space.unicode_w(w_decoded) - size = len(string) - - if size: - self.write(string) return space.newint(orig_size) def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - start = self.pos - available = len(self.buf) - start - if available <= 0: - return space.newunicode(u"") - if size >= 0 and size <= available: - end = start + size - else: - end = len(self.buf) - assert 0 <= start <= end - self.pos = end - return space.newunicode(u''.join(self.buf[start:end])) + return space.newunicode(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) limit = convert_size(space, w_limit) + if self.readuniversal: + result = self.buf.readline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + newline = self.readnl + result = self.buf.readline(newline, limit) + return space.newunicode(result) - if self.pos >= len(self.buf): - return space.newunicode(u"") - - start = self.pos - if limit < 0 or limit > len(self.buf) - self.pos: - limit = len(self.buf) - self.pos - assert limit >= 0 - - endpos, found = self._find_line_ending( - # XXX: super inefficient, makes a copy of the entire contents. - u"".join(self.buf), - start, - limit - ) - if not found: - endpos = start + limit - assert endpos >= 0 - self.pos = endpos - return space.newunicode(u"".join(self.buf[start:endpos])) @unwrap_spec(pos=int, mode=int) def seek_w(self, space, pos, mode=0): @@ -202,32 +256,27 @@ # XXX: this makes almost no sense, but its how CPython does it. if mode == 1: - pos = self.pos + pos = self.buf.pos elif mode == 2: - pos = len(self.buf) - + pos = len(self.buf.data) assert pos >= 0 - self.pos = pos + self.buf.seek(pos) return space.newint(pos) def truncate_w(self, space, w_size=None): self._check_closed(space) if space.is_none(w_size): - size = self.pos + size = self.buf.pos else: size = space.int_w(w_size) - if size < 0: raise oefmt(space.w_ValueError, "Negative size value %d", size) - - if size < len(self.buf): - self.resize_buffer(size) - + self.buf.truncate(size) return space.newint(size) def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(u''.join(self.buf)) + return space.newunicode(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -221,50 +221,6 @@ def newlines_get_w(self, space): return space.w_None - def _find_newline_universal(self, line, start, limit): - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - limit = min(limit, len(line) - start) - end = start + limit - i = start - while i < end: - ch = line[i] - i += 1 - if ch == '\n': - return i, True - if ch == '\r': - if i >= end: - break - if line[i] == '\n': - return i + 1, True - else: - return i, True - return end, False - - def _find_marker(self, marker, line, start, limit): - limit = min(limit, len(line) - start) - end = start + limit - for i in range(start, end - len(marker) + 1): - ch = line[i] - if ch == marker[0]: - for j in range(1, len(marker)): - if line[i + j] != marker[j]: - break # from inner loop - else: - return i + len(marker), True - return end - len(marker) + 1, False - - def _find_line_ending(self, line, start, limit): - if self.readuniversal: - return self._find_newline_universal(line, start, limit) - if self.readtranslate: - # Newlines are already translated, only search for \n - newline = '\n' - else: - # Non-universal mode. - newline = self.readnl - return self._find_marker(newline, line, start, limit) - W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, __new__ = generic_new_descr(W_TextIOBase), @@ -340,6 +296,126 @@ self.input = input +class DecodeBuffer(object): + def __init__(self, text=None): + self.text = text + self.pos = 0 + + def set(self, space, w_decoded): + check_decoded(space, w_decoded) + self.text = space.unicode_w(w_decoded) + self.pos = 0 + + def reset(self): + self.text = None + self.pos = 0 + + def get_chars(self, size): + if self.text is None: + return u"" + + available = len(self.text) - self.pos + if size < 0 or size > available: + size = available + assert size >= 0 + + if self.pos > 0 or size < available: + start = self.pos + end = self.pos + size + assert start >= 0 + assert end >= 0 + chars = self.text[start:end] + else: + chars = self.text + + self.pos += size + return chars + + def has_data(self): + return (self.text is not None and not self.exhausted()) + + def exhausted(self): + return self.pos >= len(self.text) + + def next_char(self): + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + self.pos += 1 + return ch + + def peek_char(self): + # like next_char, but doesn't advance pos + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + return ch + + def find_newline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == u'\n': + return True + if ch == u'\r': + if scanned >= limit: + return False + try: + ch = self.peek_char() + except StopIteration: + return False + if ch == u'\n': + self.next_char() + return True + else: + return True + return False + + def find_crlf(self, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + scanned += 1 + if ch == u'\r': + if scanned >= limit: + return False + try: + if self.peek_char() == u'\n': + self.next_char() + return True + except StopIteration: + # This is the tricky case: we found a \r right at the end + self.pos -= 1 + return False + return False + + def find_char(self, marker, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == marker: + return True + scanned += 1 + return False + + def check_decoded(space, w_decoded): if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" @@ -353,8 +429,7 @@ self.w_encoder = None self.w_decoder = None - self.decoded_chars = None # buffer for text returned from decoder - self.decoded_chars_used = 0 # offset into _decoded_chars for read() + self.decoded = DecodeBuffer() self.pending_bytes = None # list of bytes objects waiting to be # written, or NULL self.chunk_size = 8192 @@ -522,44 +597,10 @@ # _____________________________________________________________ # read methods - def _unset_decoded(self): - self.decoded_chars = None - self.decoded_chars_used = 0 - - def _set_decoded(self, space, w_decoded): - check_decoded(space, w_decoded) - self.decoded_chars = space.utf8_w(w_decoded) - self.decoded_chars_used = 0 - - def _get_decoded_chars(self, size): - if self.decoded_chars is None: - return "" - - available = len(self.decoded_chars) - self.decoded_chars_used - if size < 0 or size > available: - size = available - assert size >= 0 - - if self.decoded_chars_used > 0 or size < available: - start = self.decoded_chars_used - end = self.decoded_chars_used + size - assert start >= 0 - assert end >= 0 - chars = self.decoded_chars[start:end] - else: - chars = self.decoded_chars - - self.decoded_chars_used += size - return chars - - def _has_data(self): - return (self.decoded_chars is not None and - self.decoded_chars_used < len(self.decoded_chars)) - def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). + is placed in self.decoded (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain buffered in the decoder, yet to be converted.""" @@ -579,7 +620,7 @@ dec_buffer = None dec_flags = 0 - # Read a chunk, decode it, and put the result in self._decoded_chars + # Read a chunk, decode it, and put the result in self.decoded w_input = space.call_method(self.w_buffer, "read1", space.newint(self.chunk_size)) @@ -591,7 +632,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -604,10 +645,10 @@ return not eof def _ensure_data(self, space): - while not self._has_data(): + while not self.decoded.has_data(): try: if not self._read_chunk(space): - self._unset_decoded() + self.decoded.reset() self.snapshot = None return False except OperationError as e: @@ -640,7 +681,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self._get_decoded_chars(-1)) + w_result = space.new_from_utf8(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -652,82 +693,79 @@ while remaining > 0: if not self._ensure_data(space): break - data = self._get_decoded_chars(remaining) + data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) return space.new_from_utf8(builder.build()) + def _scan_line_ending(self, limit): + if self.readuniversal: + return self.decoded.find_newline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + # Non-universal mode. + newline = self.readnl + if newline == u'\r\n': + return self.decoded.find_crlf(limit) + else: + return self.decoded.find_char(newline[0], limit) + def readline_w(self, space, w_limit=None): self._check_attached(space) self._check_closed(space) self._writeflush(space) limit = convert_size(space, w_limit) - - line = None remnant = None builder = StringBuilder() - while True: # First, get some data if necessary has_data = self._ensure_data(space) if not has_data: # end of file - start = end_scan = 0 + if remnant: + builder.append(remnant) break if remnant: assert not self.readtranslate and self.readnl == '\r\n' - assert self.decoded_chars_used == 0 - if remnant == '\r' and self.decoded_chars[0] == '\n': + assert self.decoded.pos == 0 + if remnant == '\r' and self.decoded.text[0] == '\n': builder.append('\r\n') - self.decoded_chars_used = 1 - line = remnant = None - start = end_scan = 0 + self.decoded.pos = 1 + remnant = None break else: builder.append(remnant) remnant = None continue - line = self.decoded_chars - start = self.decoded_chars_used if limit > 0: remaining = limit - builder.getlength() assert remaining >= 0 else: - remaining = sys.maxint - end_scan, found = self._find_line_ending(line, start, remaining) - assert end_scan >= 0 - if found: + remaining = -1 + start = self.decoded.pos + assert start >= 0 + found = self._scan_line_ending(remaining) + end_scan = self.decoded.pos + if end_scan > start: + s = self.decoded.text[start:end_scan] + builder.append(s) + + if found or (limit >= 0 and builder.getlength() >= limit): break - if limit >= 0 and end_scan - start >= remaining: - # Didn't find line ending, but reached length limit - break - - # No line ending seen yet - put aside current data - if end_scan > start: - s = line[start:end_scan] - builder.append(s) - # There may be some remaining chars we'll have to prepend to the # next chunk of data - if end_scan < len(line): - remnant = line[end_scan:] - line = None + if not self.decoded.exhausted(): + remnant = self.decoded.get_chars(-1) # We have consumed the buffer - self._unset_decoded() - - if line: - # Our line ends in the current buffer - self.decoded_chars_used = end_scan - if start > 0 or end_scan < len(line): - line = line[start:end_scan] - builder.append(line) - elif remnant: - builder.append(remnant) + self.decoded.reset() result = builder.build() return space.new_from_utf8(result) @@ -861,7 +899,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._unset_decoded() + self.decoded.reset() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -886,7 +924,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._unset_decoded() + self.decoded.reset() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -907,13 +945,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded_chars) < cookie.chars_to_skip: + if len(self.decoded.text) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded_chars_used = cookie.chars_to_skip + self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -939,7 +977,7 @@ w_pos = space.call_method(self.w_buffer, "tell") if self.w_decoder is None or self.snapshot is None: - assert not self.decoded_chars + assert not self.decoded.text return w_pos cookie = PositionCookie(space.bigint_w(w_pos)) @@ -950,11 +988,11 @@ cookie.start_pos -= len(input) # How many decoded characters have been used up since the snapshot? - if not self.decoded_chars_used: + if not self.decoded.pos: # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded_chars_used + chars_to_skip = self.decoded.pos # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,6 +1,10 @@ -from hypothesis import given, strategies as st, assume +import pytest +try: + from hypothesis import given, strategies as st, assume +except ImportError: + pytest.skip("hypothesis required") from pypy.module._io.interp_bytesio import W_BytesIO -from pypy.module._io.interp_textio import W_TextIOWrapper +from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer LINESEP = ['', '\r', '\n', '\r\n'] @@ -31,3 +35,34 @@ else: break assert u''.join(lines) == txt + + at given(st.text()) +def test_read_buffer(text): + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text + assert buf.exhausted() + + at given(st.text(), st.lists(st.integers(min_value=0))) +def test_readn_buffer(text, sizes): + buf = DecodeBuffer(text) + strings = [] + for n in sizes: + s = buf.get_chars(n) + if not buf.exhausted(): + assert len(s) == n + else: + assert len(s) <= n + strings.append(s) + assert ''.join(strings) == text[:sum(sizes)] + + at given(st.text()) +def test_next_char(text): + buf = DecodeBuffer(text) + chars = [] + try: + while True: + chars.append(buf.next_char()) + except StopIteration: + pass + assert buf.exhausted() + assert u''.join(chars) == text From pypy.commits at gmail.com Sat Nov 25 21:40:16 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 18:40:16 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: Reapply b89046216269 Message-ID: <5a1a2990.89ce1c0a.19604.fd42@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93178:52a6abae06e4 Date: 2017-11-26 01:58 +0000 http://bitbucket.org/pypy/pypy/changeset/52a6abae06e4/ Log: Reapply b89046216269 diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1760,6 +1760,10 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) + def unicode_w(self, w_obj): + # XXX: kill me! + return w_obj.utf8_w(self).decode('utf-8') + def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,6 +212,12 @@ def newutf8(self, x, l, f): return w_some_obj() + def new_from_utf8(self, a): + return w_some_obj() + + def newunicode(self, a): + return w_some_obj() + newtext = newbytes newtext_or_none = newbytes newfilename = newbytes diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,10 +367,23 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) + def new_from_utf8(self, utf8s): + # XXX: kill me! + assert isinstance(utf8s, str) + length, flag = rutf8.check_utf8(utf8s, True) + return W_UnicodeObject(utf8s, length, flag) + def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding + def newunicode(self, unistr): + # XXX: kill me! + assert isinstance(unistr, unicode) + utf8s = unistr.encode("utf-8") + length, flag = rutf8.check_utf8(utf8s, True) + return self.newutf8(utf8s, length, flag) + def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Sat Nov 25 21:40:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 18:40:19 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: Adapt DecodeBuffer to utf8 Message-ID: <5a1a2993.078bdf0a.27561.61d3@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93179:e509ec2ccea2 Date: 2017-11-26 01:51 +0000 http://bitbucket.org/pypy/pypy/changeset/e509ec2ccea2/ Log: Adapt DecodeBuffer to utf8 diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,7 +11,7 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8 +from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8, next_codepoint_pos STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -303,7 +303,7 @@ def set(self, space, w_decoded): check_decoded(space, w_decoded) - self.text = space.unicode_w(w_decoded) + self.text = space.utf8_w(w_decoded) self.pos = 0 def reset(self): @@ -312,7 +312,7 @@ def get_chars(self, size): if self.text is None: - return u"" + return "" available = len(self.text) - self.pos if size < 0 or size > available: @@ -341,7 +341,7 @@ if self.exhausted(): raise StopIteration ch = self.text[self.pos] - self.pos += 1 + self.pos = next_codepoint_pos(self.text, self.pos) return ch def peek_char(self): @@ -362,16 +362,16 @@ ch = self.next_char() except StopIteration: return False - if ch == u'\n': + if ch == '\n': return True - if ch == u'\r': + if ch == '\r': if scanned >= limit: return False try: ch = self.peek_char() except StopIteration: return False - if ch == u'\n': + if ch == '\n': self.next_char() return True else: @@ -388,11 +388,11 @@ except StopIteration: return False scanned += 1 - if ch == u'\r': + if ch == '\r': if scanned >= limit: return False try: - if self.peek_char() == u'\n': + if self.peek_char() == '\n': self.next_char() return True except StopIteration: @@ -705,11 +705,11 @@ else: if self.readtranslate: # Newlines are already translated, only search for \n - newline = u'\n' + newline = '\n' else: # Non-universal mode. newline = self.readnl - if newline == u'\r\n': + if newline == '\r\n': return self.decoded.find_crlf(limit) else: return self.decoded.find_char(newline[0], limit) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -38,31 +38,27 @@ @given(st.text()) def test_read_buffer(text): - buf = DecodeBuffer(text) - assert buf.get_chars(-1) == text + buf = DecodeBuffer(text.encode('utf-8')) + assert buf.get_chars(-1) == text.encode('utf-8') assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) def test_readn_buffer(text, sizes): - buf = DecodeBuffer(text) + buf = DecodeBuffer(text.encode('utf-8')) strings = [] for n in sizes: s = buf.get_chars(n) if not buf.exhausted(): - assert len(s) == n + assert len(s.decode('utf-8')) == n else: - assert len(s) <= n + assert len(s.decode('utf-8')) <= n strings.append(s) - assert ''.join(strings) == text[:sum(sizes)] + assert ''.join(strings) == text[:sum(sizes)].encode('utf-8') @given(st.text()) def test_next_char(text): - buf = DecodeBuffer(text) - chars = [] - try: - while True: - chars.append(buf.next_char()) - except StopIteration: - pass + buf = DecodeBuffer(text.encode('utf-8')) + for i in range(len(text)): + ch = buf.next_char() + assert ch == text[i].encode('utf-8')[0] assert buf.exhausted() - assert u''.join(chars) == text From pypy.commits at gmail.com Sat Nov 25 21:40:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 18:40:21 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: Fix seek() and tell() Message-ID: <5a1a2995.a1abdf0a.9c29.76f9@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93180:9ff11e92d368 Date: 2017-11-26 02:28 +0000 http://bitbucket.org/pypy/pypy/changeset/9ff11e92d368/ Log: Fix seek() and tell() diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,7 +11,8 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8, next_codepoint_pos +from rpython.rlib.rutf8 import ( + FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -420,6 +421,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -945,13 +947,14 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded.pos = cookie.chars_to_skip + self.decoded.set(space, w_decoded) + self.decoded.pos = w_decoded._index_to_byte(cookie.chars_to_skip) else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -963,10 +966,8 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: raise oefmt(space.w_IOError, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -992,7 +993,8 @@ # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded.pos + chars_to_skip = codepoints_in_utf8( + self.decoded.text, end=self.decoded.pos) # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. @@ -1036,14 +1038,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) From pypy.commits at gmail.com Sat Nov 25 21:40:23 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 18:40:23 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: Convert stringio to utf8 Message-ID: <5a1a2997.0eef1c0a.3a2c3.828a@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93181:8a64a04eb505 Date: 2017-11-26 02:37 +0000 http://bitbucket.org/pypy/pypy/changeset/8a64a04eb505/ Log: Convert stringio to utf8 diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -17,20 +17,20 @@ if len(self.data) > newlength: self.data = self.data[:newlength] if len(self.data) < newlength: - self.data.extend([u'\0'] * (newlength - len(self.data))) + self.data.extend(['\0'] * (newlength - len(self.data))) def read(self, size): start = self.pos available = len(self.data) - start if available <= 0: - return u'' + return '' if size >= 0 and size <= available: end = start + size else: end = len(self.data) assert 0 <= start <= end self.pos = end - return u''.join(self.data[start:end]) + return ''.join(self.data[start:end]) def _convert_limit(self, limit): if limit < 0 or limit > len(self.data) - self.pos: @@ -58,7 +58,7 @@ else: break self.pos = pos - result = u''.join(self.data[start:pos]) + result = ''.join(self.data[start:pos]) return result def readline(self, marker, limit): @@ -79,7 +79,7 @@ if not found: pos = end self.pos = pos - result = u''.join(self.data[start:pos]) + result = ''.join(self.data[start:pos]) return result def write(self, string): @@ -99,7 +99,7 @@ self.resize(size) def getvalue(self): - return u''.join(self.data) + return ''.join(self.data) class W_StringIO(W_TextIOBase): @@ -118,10 +118,10 @@ if space.is_w(w_newline, space.w_None): newline = None else: - newline = space.unicode_w(w_newline) + newline = space.utf8_w(w_newline) - if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + if (newline is not None and newline != "" and newline != "\n" and + newline != "\r" and newline != "\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -131,9 +131,9 @@ ) if newline is not None: self.readnl = newline - self.readuniversal = newline is None or newline == u"" + self.readuniversal = newline is None or newline == "" self.readtranslate = newline is None - if newline and newline[0] == u"\r": + if newline and newline[0] == "\r": self.writenl = newline if self.readuniversal: self.w_decoder = space.call_function( @@ -152,7 +152,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -179,7 +179,7 @@ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely - initval = space.unicode_w(w_initval) + initval = space.utf8_w(w_initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, @@ -215,8 +215,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.newunicode(self.writenl)) - string = space.unicode_w(w_decoded) + space.newtext("\n"), space.new_from_utf8(self.writenl)) + string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +225,7 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.newunicode(self.buf.read(size)) + return space.new_from_utf8(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -235,11 +235,11 @@ else: if self.readtranslate: # Newlines are already translated, only search for \n - newline = u'\n' + newline = '\n' else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.newunicode(result) + return space.new_from_utf8(result) @unwrap_spec(pos=int, mode=int) @@ -276,7 +276,7 @@ def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(self.buf.getvalue()) + return space.new_from_utf8(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) From pypy.commits at gmail.com Sat Nov 25 21:42:51 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 18:42:51 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: kill test: _io needs the real space Message-ID: <5a1a2a2b.e4a6df0a.72dd5.12bb@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93182:e31d72c624a8 Date: 2017-11-26 02:40 +0000 http://bitbucket.org/pypy/pypy/changeset/e31d72c624a8/ Log: kill test: _io needs the real space diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py deleted file mode 100644 --- a/pypy/module/_io/test/test_ztranslation.py +++ /dev/null @@ -1,4 +0,0 @@ -from pypy.objspace.fake.checkmodule import checkmodule - -def test_checkmodule(): - checkmodule('_io') From pypy.commits at gmail.com Sat Nov 25 22:10:19 2017 From: pypy.commits at gmail.com (rlamy) Date: Sat, 25 Nov 2017 19:10:19 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a1a309b.8b8a1c0a.47f9e.61a7@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93183:8125ba2d1fc1 Date: 2017-11-26 03:02 +0000 http://bitbucket.org/pypy/pypy/changeset/8125ba2d1fc1/ Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,28 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(txt, mode, limit): + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + lines = [] + while True: + line = textio.readline(limit) + if limit > 0: + assert len(line) < limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -2,21 +2,115 @@ from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder +from pypy.module._io.interp_textio import ( + W_TextIOBase, W_IncrementalNewlineDecoder) from pypy.module._io.interp_iobase import convert_size +class UnicodeIO(object): + def __init__(self, data=None, pos=0): + if data is None: + data = [] + self.data = data + self.pos = pos + + def resize(self, newlength): + if len(self.data) > newlength: + self.data = self.data[:newlength] + if len(self.data) < newlength: + self.data.extend([u'\0'] * (newlength - len(self.data))) + + def read(self, size): + start = self.pos + available = len(self.data) - start + if available <= 0: + return u'' + if size >= 0 and size <= available: + end = start + size + else: + end = len(self.data) + assert 0 <= start <= end + self.pos = end + return u''.join(self.data[start:end]) + + def _convert_limit(self, limit): + if limit < 0 or limit > len(self.data) - self.pos: + limit = len(self.data) - self.pos + assert limit >= 0 + return limit + + def readline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + limit = self._convert_limit(limit) + start = self.pos + end = start + limit + pos = start + while pos < end: + ch = self.data[pos] + pos += 1 + if ch == '\n': + break + if ch == '\r': + if pos >= end: + break + if self.data[pos] == '\n': + pos += 1 + break + else: + break + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def readline(self, marker, limit): + start = self.pos + limit = self._convert_limit(limit) + end = start + limit + found = False + for pos in range(start, end - len(marker) + 1): + ch = self.data[pos] + if ch == marker[0]: + for j in range(1, len(marker)): + if self.data[pos + j] != marker[j]: + break # from inner loop + else: + pos += len(marker) + found = True + break + if not found: + pos = end + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def write(self, string): + length = len(string) + if self.pos + length > len(self.data): + self.resize(self.pos + length) + + for i in range(length): + self.data[self.pos + i] = string[i] + self.pos += length + + def seek(self, pos): + self.pos = pos + + def truncate(self, size): + if size < len(self.data): + self.resize(size) + + def getvalue(self): + return u''.join(self.data) + class W_StringIO(W_TextIOBase): def __init__(self, space): W_TextIOBase.__init__(self, space) - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() @unwrap_spec(w_newline = WrappedDefault(u"\n")) def descr_init(self, space, w_initvalue=None, w_newline=None): # In case __init__ is called multiple times - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() self.w_decoder = None self.readnl = None self.writenl = None @@ -27,7 +121,7 @@ newline = space.unicode_w(w_newline) if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + newline != u"\r" and newline != u"\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -50,7 +144,7 @@ if not space.is_none(w_initvalue): self.write_w(space, w_initvalue) - self.pos = 0 + self.buf.pos = 0 def descr_getstate(self, space): w_initialval = self.getvalue_w(space) @@ -58,9 +152,9 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.newunicode(self.readnl)) # YYY return space.newtuple([ - w_initialval, w_readnl, space.newint(self.pos), w_dict + w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) def descr_setstate(self, space, w_state): @@ -69,34 +163,33 @@ # We allow the state tuple to be longer than 4, because we may need # someday to extend the object's state without breaking # backwards-compatibility - if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4: + if (not space.isinstance_w(w_state, space.w_tuple) + or space.len_w(w_state) < 4): raise oefmt(space.w_TypeError, "%T.__setstate__ argument should be a 4-tuple, got %T", self, w_state) w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4) + if not space.isinstance_w(w_initval, space.w_unicode): + raise oefmt(space.w_TypeError, + "unicode argument expected, got '%T'", w_initval) # Initialize state - self.descr_init(space, w_initval, w_readnl) + self.descr_init(space, None, w_readnl) - # Restore the buffer state. Even if __init__ did initialize the buffer, - # we have to initialize it again since __init__ may translates the - # newlines in the inital_value string. We clearly do not want that + # Restore the buffer state. We're not doing it via __init__ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely initval = space.unicode_w(w_initval) - size = len(initval) - self.resize_buffer(size) - self.buf = list(initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.pos = pos + self.buf = UnicodeIO(list(initval), pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): - raise oefmt(space.w_TypeError, - "fourth item of state should be a dict, got a %T", - w_dict) + raise oefmt( + space.w_TypeError, + "fourth item of state should be a dict, got a %T", w_dict) # Alternatively, we could replace the internal dictionary # completely. However, it seems more practical to just update it. space.call_method(self.w_dict, "update", w_dict) @@ -107,88 +200,47 @@ message = "I/O operation on closed file" raise OperationError(space.w_ValueError, space.newtext(message)) - def resize_buffer(self, newlength): - if len(self.buf) > newlength: - self.buf = self.buf[:newlength] - if len(self.buf) < newlength: - self.buf.extend([u'\0'] * (newlength - len(self.buf))) - - def write(self, string): - length = len(string) - if self.pos + length > len(self.buf): - self.resize_buffer(self.pos + length) - - for i in range(length): - self.buf[self.pos + i] = string[i] - self.pos += length - def write_w(self, space, w_obj): if not space.isinstance_w(w_obj, space.w_unicode): raise oefmt(space.w_TypeError, "unicode argument expected, got '%T'", w_obj) self._check_closed(space) - orig_size = space.len_w(w_obj) if self.w_decoder is not None: w_decoded = space.call_method( - self.w_decoder, "decode", w_obj, space.w_True - ) + self.w_decoder, "decode", w_obj, space.w_True) else: w_decoded = w_obj - if self.writenl: w_decoded = space.call_method( - w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl) - ) + w_decoded, "replace", + space.newtext("\n"), space.newunicode(self.writenl)) + string = space.unicode_w(w_decoded) + if string: + self.buf.write(string) - string = space.unicode_w(w_decoded) - size = len(string) - - if size: - self.write(string) return space.newint(orig_size) def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - start = self.pos - available = len(self.buf) - start - if available <= 0: - return space.newunicode(u"") - if size >= 0 and size <= available: - end = start + size - else: - end = len(self.buf) - assert 0 <= start <= end - self.pos = end - return space.newunicode(u''.join(self.buf[start:end])) + return space.newunicode(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) limit = convert_size(space, w_limit) + if self.readuniversal: + result = self.buf.readline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + newline = self.readnl + result = self.buf.readline(newline, limit) + return space.newunicode(result) - if self.pos >= len(self.buf): - return space.newunicode(u"") - - start = self.pos - if limit < 0 or limit > len(self.buf) - self.pos: - limit = len(self.buf) - self.pos - - assert limit >= 0 - end = start + limit - - endpos, consumed = self._find_line_ending( - # XXX: super inefficient, makes a copy of the entire contents. - u"".join(self.buf), - start, - end - ) - if endpos < 0: - endpos = end - assert endpos >= 0 - self.pos = endpos - return space.newunicode(u"".join(self.buf[start:endpos])) @unwrap_spec(pos=int, mode=int) def seek_w(self, space, pos, mode=0): @@ -204,32 +256,27 @@ # XXX: this makes almost no sense, but its how CPython does it. if mode == 1: - pos = self.pos + pos = self.buf.pos elif mode == 2: - pos = len(self.buf) - + pos = len(self.buf.data) assert pos >= 0 - self.pos = pos + self.buf.seek(pos) return space.newint(pos) def truncate_w(self, space, w_size=None): self._check_closed(space) if space.is_none(w_size): - size = self.pos + size = self.buf.pos else: size = space.int_w(w_size) - if size < 0: raise oefmt(space.w_ValueError, "Negative size value %d", size) - - if size < len(self.buf): - self.resize_buffer(size) - + self.buf.truncate(size) return space.newint(size) def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(u''.join(self.buf)) + return space.newunicode(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -214,45 +214,6 @@ def newlines_get_w(self, space): return space.w_None - def _find_line_ending(self, line, start, end): - size = end - start - if self.readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - i = start - while True: - # Fast path for non-control chars. - while i < end and line[i] > '\r': - i += 1 - if i >= end: - return -1, size - ch = line[i] - i += 1 - if ch == '\n': - return i, 0 - if ch == '\r': - if line[i] == '\n': - return i + 1, 0 - else: - return i, 0 - if self.readtranslate: - # Newlines are already translated, only search for \n - newline = u'\n' - else: - # Non-universal mode. - newline = self.readnl - end_scan = end - len(newline) + 1 - for i in range(start, end_scan): - ch = line[i] - if ch == newline[0]: - for j in range(1, len(newline)): - if line[i + j] != newline[j]: - break - else: - return i + len(newline), 0 - return -1, end_scan - - W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, __new__ = generic_new_descr(W_TextIOBase), @@ -343,6 +304,126 @@ self.input = input +class DecodeBuffer(object): + def __init__(self, text=None): + self.text = text + self.pos = 0 + + def set(self, space, w_decoded): + check_decoded(space, w_decoded) + self.text = space.unicode_w(w_decoded) + self.pos = 0 + + def reset(self): + self.text = None + self.pos = 0 + + def get_chars(self, size): + if self.text is None: + return u"" + + available = len(self.text) - self.pos + if size < 0 or size > available: + size = available + assert size >= 0 + + if self.pos > 0 or size < available: + start = self.pos + end = self.pos + size + assert start >= 0 + assert end >= 0 + chars = self.text[start:end] + else: + chars = self.text + + self.pos += size + return chars + + def has_data(self): + return (self.text is not None and not self.exhausted()) + + def exhausted(self): + return self.pos >= len(self.text) + + def next_char(self): + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + self.pos += 1 + return ch + + def peek_char(self): + # like next_char, but doesn't advance pos + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + return ch + + def find_newline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == u'\n': + return True + if ch == u'\r': + if scanned >= limit: + return False + try: + ch = self.peek_char() + except StopIteration: + return False + if ch == u'\n': + self.next_char() + return True + else: + return True + return False + + def find_crlf(self, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + scanned += 1 + if ch == u'\r': + if scanned >= limit: + return False + try: + if self.peek_char() == u'\n': + self.next_char() + return True + except StopIteration: + # This is the tricky case: we found a \r right at the end + self.pos -= 1 + return False + return False + + def find_char(self, marker, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == marker: + return True + scanned += 1 + return False + + def check_decoded(space, w_decoded): if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" @@ -356,8 +437,7 @@ self.w_encoder = None self.w_decoder = None - self.decoded_chars = None # buffer for text returned from decoder - self.decoded_chars_used = 0 # offset into _decoded_chars for read() + self.decoded = DecodeBuffer() self.pending_bytes = None # list of bytes objects waiting to be # written, or NULL self.chunk_size = 8192 @@ -546,44 +626,10 @@ # _____________________________________________________________ # read methods - def _unset_decoded(self): - self.decoded_chars = None - self.decoded_chars_used = 0 - - def _set_decoded(self, space, w_decoded): - check_decoded(space, w_decoded) - self.decoded_chars = space.unicode_w(w_decoded) - self.decoded_chars_used = 0 - - def _get_decoded_chars(self, size): - if self.decoded_chars is None: - return u"" - - available = len(self.decoded_chars) - self.decoded_chars_used - if size < 0 or size > available: - size = available - assert size >= 0 - - if self.decoded_chars_used > 0 or size < available: - start = self.decoded_chars_used - end = self.decoded_chars_used + size - assert start >= 0 - assert end >= 0 - chars = self.decoded_chars[start:end] - else: - chars = self.decoded_chars - - self.decoded_chars_used += size - return chars - - def _has_data(self): - return (self.decoded_chars is not None and - self.decoded_chars_used < len(self.decoded_chars)) - def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). + is placed in self.decoded (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain buffered in the decoder, yet to be converted.""" @@ -607,7 +653,7 @@ dec_buffer = None dec_flags = 0 - # Read a chunk, decode it, and put the result in self._decoded_chars + # Read a chunk, decode it, and put the result in self.decoded func_name = "read1" if self.has_read1 else "read" w_input = space.call_method(self.w_buffer, func_name, space.newint(self.chunk_size)) @@ -622,7 +668,7 @@ eof = input_buf.getlength() == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -635,10 +681,10 @@ return not eof def _ensure_data(self, space): - while not self._has_data(): + while not self.decoded.has_data(): try: if not self._read_chunk(space): - self._unset_decoded() + self.decoded.reset() self.snapshot = None return False except OperationError as e: @@ -671,7 +717,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.newunicode(self._get_decoded_chars(-1)) + w_result = space.newunicode(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -683,83 +729,79 @@ while remaining > 0: if not self._ensure_data(space): break - data = self._get_decoded_chars(remaining) + data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) return space.newunicode(builder.build()) + def _scan_line_ending(self, limit): + if self.readuniversal: + return self.decoded.find_newline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + # Non-universal mode. + newline = self.readnl + if newline == u'\r\n': + return self.decoded.find_crlf(limit) + else: + return self.decoded.find_char(newline[0], limit) + def readline_w(self, space, w_limit=None): self._check_attached(space) self._check_closed(space) self._writeflush(space) limit = convert_size(space, w_limit) - - line = None - remaining = None + remnant = None builder = UnicodeBuilder() - while True: # First, get some data if necessary has_data = self._ensure_data(space) if not has_data: # end of file - start = endpos = offset_to_buffer = 0 + if remnant: + builder.append(remnant) break - if not remaining: - line = self.decoded_chars - start = self.decoded_chars_used - offset_to_buffer = 0 + if remnant: + assert not self.readtranslate and self.readnl == u'\r\n' + assert self.decoded.pos == 0 + if remnant == u'\r' and self.decoded.text[0] == u'\n': + builder.append(u'\r\n') + self.decoded.pos = 1 + remnant = None + break + else: + builder.append(remnant) + remnant = None + continue + + if limit > 0: + remaining = limit - builder.getlength() + assert remaining >= 0 else: - assert self.decoded_chars_used == 0 - line = remaining + self.decoded_chars - start = 0 - offset_to_buffer = len(remaining) - remaining = None + remaining = -1 + start = self.decoded.pos + assert start >= 0 + found = self._scan_line_ending(remaining) + end_scan = self.decoded.pos + if end_scan > start: + s = self.decoded.text[start:end_scan] + builder.append(s) - line_len = len(line) - endpos, consumed = self._find_line_ending(line, start, line_len) - chunked = builder.getlength() - if endpos >= 0: - if limit >= 0 and endpos >= start + limit - chunked: - endpos = start + limit - chunked - assert endpos >= 0 - break - assert consumed >= 0 - - # We can put aside up to `endpos` - endpos = consumed + start - if limit >= 0 and endpos >= start + limit - chunked: - # Didn't find line ending, but reached length limit - endpos = start + limit - chunked - assert endpos >= 0 + if found or (limit >= 0 and builder.getlength() >= limit): break - # No line ending seen yet - put aside current data - if endpos > start: - s = line[start:endpos] - builder.append(s) - - # There may be some remaining bytes we'll have to prepend to the + # There may be some remaining chars we'll have to prepend to the # next chunk of data - if endpos < line_len: - remaining = line[endpos:] - line = None + if not self.decoded.exhausted(): + remnant = self.decoded.get_chars(-1) # We have consumed the buffer - self._unset_decoded() - - if line: - # Our line ends in the current buffer - decoded_chars_used = endpos - offset_to_buffer - assert decoded_chars_used >= 0 - self.decoded_chars_used = decoded_chars_used - if start > 0 or endpos < len(line): - line = line[start:endpos] - builder.append(line) - elif remaining: - builder.append(remaining) + self.decoded.reset() result = builder.build() return space.newunicode(result) @@ -903,7 +945,7 @@ self._unsupportedoperation( space, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._unset_decoded() + self.decoded.reset() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -933,7 +975,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._unset_decoded() + self.decoded.reset() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -954,13 +996,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded_chars) < cookie.chars_to_skip: + if len(self.decoded.text) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded_chars_used = cookie.chars_to_skip + self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -987,7 +1029,7 @@ w_pos = space.call_method(self.w_buffer, "tell") if self.w_decoder is None or self.snapshot is None: - assert not self.decoded_chars + assert not self.decoded.text return w_pos cookie = PositionCookie(space.bigint_w(w_pos)) @@ -998,11 +1040,11 @@ cookie.start_pos -= len(input) # How many decoded characters have been used up since the snapshot? - if not self.decoded_chars_used: + if not self.decoded.pos: # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded_chars_used + chars_to_skip = self.decoded.pos # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_interp_textio.py @@ -0,0 +1,68 @@ +import pytest +try: + from hypothesis import given, strategies as st, assume +except ImportError: + pytest.skip("hypothesis required") +from pypy.module._io.interp_bytesio import W_BytesIO +from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(space, txt, mode, limit): + assume(limit != 0) + w_stream = W_BytesIO(space) + w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) + w_textio = W_TextIOWrapper(space) + w_textio.descr_init( + space, w_stream, encoding='utf-8', + w_newline=space.newtext(mode)) + lines = [] + while True: + line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + if limit > 0: + assert len(line) <= limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt + + at given(st.text()) +def test_read_buffer(text): + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text + assert buf.exhausted() + + at given(st.text(), st.lists(st.integers(min_value=0))) +def test_readn_buffer(text, sizes): + buf = DecodeBuffer(text) + strings = [] + for n in sizes: + s = buf.get_chars(n) + if not buf.exhausted(): + assert len(s) == n + else: + assert len(s) <= n + strings.append(s) + assert ''.join(strings) == text[:sum(sizes)] + + at given(st.text()) +def test_next_char(text): + buf = DecodeBuffer(text) + chars = [] + try: + while True: + chars.append(buf.next_char()) + except StopIteration: + pass + assert buf.exhausted() + assert u''.join(chars) == text From pypy.commits at gmail.com Sun Nov 26 16:24:20 2017 From: pypy.commits at gmail.com (mattip) Date: Sun, 26 Nov 2017 13:24:20 -0800 (PST) Subject: [pypy-commit] pypy default: DOC: how to hack your win32 environment to build cffi modules without setuptools Message-ID: <5a1b3104.9085df0a.341f4.29e4@mx.google.com> Author: Matti Picus Branch: Changeset: r93184:7b43c9a3c3d2 Date: 2017-11-26 23:23 +0200 http://bitbucket.org/pypy/pypy/changeset/7b43c9a3c3d2/ Log: DOC: how to hack your win32 environment to build cffi modules without setuptools diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- From pypy.commits at gmail.com Mon Nov 27 16:17:18 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 27 Nov 2017 13:17:18 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: merge heads Message-ID: <5a1c80de.5d87df0a.a0b86.e9f5@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93186:350cb9b5b92b Date: 2017-11-27 22:16 +0100 http://bitbucket.org/pypy/pypy/changeset/350cb9b5b92b/ Log: merge heads diff too long, truncating to 2000 out of 2094 lines diff --git a/TODO b/TODO --- a/TODO +++ b/TODO @@ -9,3 +9,5 @@ * remove assertions from W_UnicodeObject.__init__ if all the builders pass * what to do with error handlers that go backwards. There were tests in test_codecs that would check for that + +* fix _pypyjson to not use a wrapped dict when decoding an object diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,28 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(txt, mode, limit): + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + lines = [] + while True: + line = textio.readline(limit) + if limit > 0: + assert len(line) < limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1760,10 +1760,6 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) - def unicode_w(self, w_obj): - # XXX: kill me! - return w_obj.utf8_w(self).decode('utf-8') - def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -2,21 +2,115 @@ from pypy.interpreter.typedef import ( TypeDef, generic_new_descr, GetSetProperty) from pypy.interpreter.gateway import interp2app, unwrap_spec, WrappedDefault -from pypy.module._io.interp_textio import W_TextIOBase, W_IncrementalNewlineDecoder +from pypy.module._io.interp_textio import ( + W_TextIOBase, W_IncrementalNewlineDecoder) from pypy.module._io.interp_iobase import convert_size +class UnicodeIO(object): + def __init__(self, data=None, pos=0): + if data is None: + data = [] + self.data = data + self.pos = pos + + def resize(self, newlength): + if len(self.data) > newlength: + self.data = self.data[:newlength] + if len(self.data) < newlength: + self.data.extend([u'\0'] * (newlength - len(self.data))) + + def read(self, size): + start = self.pos + available = len(self.data) - start + if available <= 0: + return u'' + if size >= 0 and size <= available: + end = start + size + else: + end = len(self.data) + assert 0 <= start <= end + self.pos = end + return u''.join(self.data[start:end]) + + def _convert_limit(self, limit): + if limit < 0 or limit > len(self.data) - self.pos: + limit = len(self.data) - self.pos + assert limit >= 0 + return limit + + def readline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + limit = self._convert_limit(limit) + start = self.pos + end = start + limit + pos = start + while pos < end: + ch = self.data[pos] + pos += 1 + if ch == '\n': + break + if ch == '\r': + if pos >= end: + break + if self.data[pos] == '\n': + pos += 1 + break + else: + break + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def readline(self, marker, limit): + start = self.pos + limit = self._convert_limit(limit) + end = start + limit + found = False + for pos in range(start, end - len(marker) + 1): + ch = self.data[pos] + if ch == marker[0]: + for j in range(1, len(marker)): + if self.data[pos + j] != marker[j]: + break # from inner loop + else: + pos += len(marker) + found = True + break + if not found: + pos = end + self.pos = pos + result = u''.join(self.data[start:pos]) + return result + + def write(self, string): + length = len(string) + if self.pos + length > len(self.data): + self.resize(self.pos + length) + + for i in range(length): + self.data[self.pos + i] = string[i] + self.pos += length + + def seek(self, pos): + self.pos = pos + + def truncate(self, size): + if size < len(self.data): + self.resize(size) + + def getvalue(self): + return u''.join(self.data) + class W_StringIO(W_TextIOBase): def __init__(self, space): W_TextIOBase.__init__(self, space) - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() - @unwrap_spec(w_newline = WrappedDefault("\n")) + @unwrap_spec(w_newline=WrappedDefault("\n")) def descr_init(self, space, w_initvalue=None, w_newline=None): # In case __init__ is called multiple times - self.buf = [] - self.pos = 0 + self.buf = UnicodeIO() self.w_decoder = None self.readnl = None self.writenl = None @@ -27,7 +121,7 @@ newline = space.unicode_w(w_newline) if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + newline != u"\r" and newline != u"\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -50,7 +144,7 @@ if not space.is_none(w_initvalue): self.write_w(space, w_initvalue) - self.pos = 0 + self.buf.pos = 0 def descr_getstate(self, space): w_initialval = self.getvalue_w(space) @@ -58,9 +152,9 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.newunicode(self.readnl)) # YYY return space.newtuple([ - w_initialval, w_readnl, space.newint(self.pos), w_dict + w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) def descr_setstate(self, space, w_state): @@ -69,34 +163,33 @@ # We allow the state tuple to be longer than 4, because we may need # someday to extend the object's state without breaking # backwards-compatibility - if not space.isinstance_w(w_state, space.w_tuple) or space.len_w(w_state) < 4: + if (not space.isinstance_w(w_state, space.w_tuple) + or space.len_w(w_state) < 4): raise oefmt(space.w_TypeError, "%T.__setstate__ argument should be a 4-tuple, got %T", self, w_state) w_initval, w_readnl, w_pos, w_dict = space.unpackiterable(w_state, 4) + if not space.isinstance_w(w_initval, space.w_unicode): + raise oefmt(space.w_TypeError, + "unicode argument expected, got '%T'", w_initval) # Initialize state - self.descr_init(space, w_initval, w_readnl) + self.descr_init(space, None, w_readnl) - # Restore the buffer state. Even if __init__ did initialize the buffer, - # we have to initialize it again since __init__ may translates the - # newlines in the inital_value string. We clearly do not want that + # Restore the buffer state. We're not doing it via __init__ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely initval = space.unicode_w(w_initval) - size = len(initval) - self.resize_buffer(size) - self.buf = list(initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, "position value cannot be negative") - self.pos = pos + self.buf = UnicodeIO(list(initval), pos) if not space.is_w(w_dict, space.w_None): if not space.isinstance_w(w_dict, space.w_dict): - raise oefmt(space.w_TypeError, - "fourth item of state should be a dict, got a %T", - w_dict) + raise oefmt( + space.w_TypeError, + "fourth item of state should be a dict, got a %T", w_dict) # Alternatively, we could replace the internal dictionary # completely. However, it seems more practical to just update it. space.call_method(self.w_dict, "update", w_dict) @@ -107,88 +200,47 @@ message = "I/O operation on closed file" raise OperationError(space.w_ValueError, space.newtext(message)) - def resize_buffer(self, newlength): - if len(self.buf) > newlength: - self.buf = self.buf[:newlength] - if len(self.buf) < newlength: - self.buf.extend([u'\0'] * (newlength - len(self.buf))) - - def write(self, string): - length = len(string) - if self.pos + length > len(self.buf): - self.resize_buffer(self.pos + length) - - for i in range(length): - self.buf[self.pos + i] = string[i] - self.pos += length - def write_w(self, space, w_obj): if not space.isinstance_w(w_obj, space.w_unicode): raise oefmt(space.w_TypeError, "unicode argument expected, got '%T'", w_obj) self._check_closed(space) - orig_size = space.len_w(w_obj) if self.w_decoder is not None: w_decoded = space.call_method( - self.w_decoder, "decode", w_obj, space.w_True - ) + self.w_decoder, "decode", w_obj, space.w_True) else: w_decoded = w_obj - if self.writenl: w_decoded = space.call_method( - w_decoded, "replace", space.newtext("\n"), space.newunicode(self.writenl) - ) + w_decoded, "replace", + space.newtext("\n"), space.newunicode(self.writenl)) + string = space.unicode_w(w_decoded) + if string: + self.buf.write(string) - string = space.unicode_w(w_decoded) - size = len(string) - - if size: - self.write(string) return space.newint(orig_size) def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - start = self.pos - available = len(self.buf) - start - if available <= 0: - return space.newunicode(u"") - if size >= 0 and size <= available: - end = start + size - else: - end = len(self.buf) - assert 0 <= start <= end - self.pos = end - return space.newunicode(u''.join(self.buf[start:end])) + return space.newunicode(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) limit = convert_size(space, w_limit) + if self.readuniversal: + result = self.buf.readline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + newline = self.readnl + result = self.buf.readline(newline, limit) + return space.newunicode(result) - if self.pos >= len(self.buf): - return space.newunicode(u"") - - start = self.pos - if limit < 0 or limit > len(self.buf) - self.pos: - limit = len(self.buf) - self.pos - - assert limit >= 0 - end = start + limit - - endpos, consumed = self._find_line_ending( - # XXX: super inefficient, makes a copy of the entire contents. - u"".join(self.buf), - start, - end - ) - if endpos < 0: - endpos = end - assert endpos >= 0 - self.pos = endpos - return space.newunicode(u"".join(self.buf[start:endpos])) @unwrap_spec(pos=int, mode=int) def seek_w(self, space, pos, mode=0): @@ -204,32 +256,27 @@ # XXX: this makes almost no sense, but its how CPython does it. if mode == 1: - pos = self.pos + pos = self.buf.pos elif mode == 2: - pos = len(self.buf) - + pos = len(self.buf.data) assert pos >= 0 - self.pos = pos + self.buf.seek(pos) return space.newint(pos) def truncate_w(self, space, w_size=None): self._check_closed(space) if space.is_none(w_size): - size = self.pos + size = self.buf.pos else: size = space.int_w(w_size) - if size < 0: raise oefmt(space.w_ValueError, "Negative size value %d", size) - - if size < len(self.buf): - self.resize_buffer(size) - + self.buf.truncate(size) return space.newint(size) def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(u''.join(self.buf)) + return space.newunicode(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -221,46 +221,6 @@ def newlines_get_w(self, space): return space.w_None - def _find_line_ending(self, line, start, end): - size = end - start - if self.readtranslate: - # Newlines are already translated, only search for \n - pos = line.find('\n', start, end) - if pos >= 0: - return pos + 1, 0 - else: - return -1, size - elif self.readuniversal: - # Universal newline search. Find any of \r, \r\n, \n - # The decoder ensures that \r\n are not split in two pieces - i = start - while True: - # Fast path for non-control chars. - while i < end and line[i] > '\r': - i += 1 - if i >= end: - return -1, size - ch = line[i] - i += 1 - if ch == '\n': - return i, 0 - if ch == '\r': - if line[i] == '\n': - return i + 1, 0 - else: - return i, 0 - else: - # Non-universal mode. - pos = line.find(self.readnl, start, end) - if pos >= 0: - return pos + len(self.readnl), 0 - else: - pos = line.find(self.readnl[0], start, end) - if pos >= 0: - return -1, pos - start - return -1, size - - W_TextIOBase.typedef = TypeDef( '_io._TextIOBase', W_IOBase.typedef, __new__ = generic_new_descr(W_TextIOBase), @@ -336,6 +296,126 @@ self.input = input +class DecodeBuffer(object): + def __init__(self, text=None): + self.text = text + self.pos = 0 + + def set(self, space, w_decoded): + check_decoded(space, w_decoded) + self.text = space.unicode_w(w_decoded) + self.pos = 0 + + def reset(self): + self.text = None + self.pos = 0 + + def get_chars(self, size): + if self.text is None: + return u"" + + available = len(self.text) - self.pos + if size < 0 or size > available: + size = available + assert size >= 0 + + if self.pos > 0 or size < available: + start = self.pos + end = self.pos + size + assert start >= 0 + assert end >= 0 + chars = self.text[start:end] + else: + chars = self.text + + self.pos += size + return chars + + def has_data(self): + return (self.text is not None and not self.exhausted()) + + def exhausted(self): + return self.pos >= len(self.text) + + def next_char(self): + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + self.pos += 1 + return ch + + def peek_char(self): + # like next_char, but doesn't advance pos + if self.exhausted(): + raise StopIteration + ch = self.text[self.pos] + return ch + + def find_newline_universal(self, limit): + # Universal newline search. Find any of \r, \r\n, \n + # The decoder ensures that \r\n are not split in two pieces + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == u'\n': + return True + if ch == u'\r': + if scanned >= limit: + return False + try: + ch = self.peek_char() + except StopIteration: + return False + if ch == u'\n': + self.next_char() + return True + else: + return True + return False + + def find_crlf(self, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + scanned += 1 + if ch == u'\r': + if scanned >= limit: + return False + try: + if self.peek_char() == u'\n': + self.next_char() + return True + except StopIteration: + # This is the tricky case: we found a \r right at the end + self.pos -= 1 + return False + return False + + def find_char(self, marker, limit): + if limit < 0: + limit = sys.maxint + scanned = 0 + while scanned < limit: + try: + ch = self.next_char() + except StopIteration: + return False + if ch == marker: + return True + scanned += 1 + return False + + def check_decoded(space, w_decoded): if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" @@ -349,8 +429,7 @@ self.w_encoder = None self.w_decoder = None - self.decoded_chars = None # buffer for text returned from decoder - self.decoded_chars_used = 0 # offset into _decoded_chars for read() + self.decoded = DecodeBuffer() self.pending_bytes = None # list of bytes objects waiting to be # written, or NULL self.chunk_size = 8192 @@ -518,40 +597,10 @@ # _____________________________________________________________ # read methods - def _unset_decoded(self): - self.decoded_chars = None - self.decoded_chars_used = 0 - - def _set_decoded(self, space, w_decoded): - check_decoded(space, w_decoded) - self.decoded_chars = space.utf8_w(w_decoded) - self.decoded_chars_used = 0 - - def _get_decoded_chars(self, size): - if self.decoded_chars is None: - return "" - - available = len(self.decoded_chars) - self.decoded_chars_used - if size < 0 or size > available: - size = available - assert size >= 0 - - if self.decoded_chars_used > 0 or size < available: - start = self.decoded_chars_used - end = self.decoded_chars_used + size - assert start >= 0 - assert end >= 0 - chars = self.decoded_chars[start:end] - else: - chars = self.decoded_chars - - self.decoded_chars_used += size - return chars - def _read_chunk(self, space): """Read and decode the next chunk of data from the BufferedReader. The return value is True unless EOF was reached. The decoded string - is placed in self._decoded_chars (replacing its previous value). + is placed in self.decoded (replacing its previous value). The entire input chunk is sent to the decoder, though some of it may remain buffered in the decoder, yet to be converted.""" @@ -571,7 +620,7 @@ dec_buffer = None dec_flags = 0 - # Read a chunk, decode it, and put the result in self._decoded_chars + # Read a chunk, decode it, and put the result in self.decoded w_input = space.call_method(self.w_buffer, "read1", space.newint(self.chunk_size)) @@ -583,7 +632,7 @@ eof = space.len_w(w_input) == 0 w_decoded = space.call_method(self.w_decoder, "decode", w_input, space.newbool(eof)) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) if space.len_w(w_decoded) > 0: eof = False @@ -595,6 +644,19 @@ return not eof + def _ensure_data(self, space): + while not self.decoded.has_data(): + try: + if not self._read_chunk(space): + self.decoded.reset() + self.snapshot = None + return False + except OperationError as e: + if trap_eintr(space, e): + continue + raise + return True + def next_w(self, space): self._check_attached(space) self.telling = False @@ -619,7 +681,7 @@ w_bytes = space.call_method(self.w_buffer, "read") w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True) check_decoded(space, w_decoded) - w_result = space.new_from_utf8(self._get_decoded_chars(-1)) + w_result = space.new_from_utf8(self.decoded.get_chars(-1)) w_final = space.add(w_result, w_decoded) self.snapshot = None return w_final @@ -628,24 +690,29 @@ builder = StringBuilder(size) # Keep reading chunks until we have n characters to return - while True: - data = self._get_decoded_chars(remaining) + while remaining > 0: + if not self._ensure_data(space): + break + data = self.decoded.get_chars(remaining) builder.append(data) remaining -= len(data) - if remaining <= 0: # Done - break + return space.new_from_utf8(builder.build()) - try: - if not self._read_chunk(space): - # EOF - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise - - return space.new_from_utf8(builder.build()) + def _scan_line_ending(self, limit): + if self.readuniversal: + return self.decoded.find_newline_universal(limit) + else: + if self.readtranslate: + # Newlines are already translated, only search for \n + newline = u'\n' + else: + # Non-universal mode. + newline = self.readnl + if newline == u'\r\n': + return self.decoded.find_crlf(limit) + else: + return self.decoded.find_char(newline[0], limit) def readline_w(self, space, w_limit=None): self._check_attached(space) @@ -653,82 +720,52 @@ self._writeflush(space) limit = convert_size(space, w_limit) - - line = None - remaining = None + remnant = None builder = StringBuilder() - while True: # First, get some data if necessary - has_data = True - while not self.decoded_chars: - try: - if not self._read_chunk(space): - has_data = False - break - except OperationError as e: - if trap_eintr(space, e): - continue - raise + has_data = self._ensure_data(space) if not has_data: # end of file - self._unset_decoded() - self.snapshot = None - start = endpos = offset_to_buffer = 0 + if remnant: + builder.append(remnant) break - if not remaining: - line = self.decoded_chars - start = self.decoded_chars_used - offset_to_buffer = 0 + if remnant: + assert not self.readtranslate and self.readnl == '\r\n' + assert self.decoded.pos == 0 + if remnant == '\r' and self.decoded.text[0] == '\n': + builder.append('\r\n') + self.decoded.pos = 1 + remnant = None + break + else: + builder.append(remnant) + remnant = None + continue + + if limit > 0: + remaining = limit - builder.getlength() + assert remaining >= 0 else: - assert self.decoded_chars_used == 0 - line = remaining + self.decoded_chars - start = 0 - offset_to_buffer = len(remaining) - remaining = None + remaining = -1 + start = self.decoded.pos + assert start >= 0 + found = self._scan_line_ending(remaining) + end_scan = self.decoded.pos + if end_scan > start: + s = self.decoded.text[start:end_scan] + builder.append(s) - line_len = len(line) - endpos, consumed = self._find_line_ending(line, start, line_len) - chunked = builder.getlength() - if endpos >= 0: - if limit >= 0 and endpos >= start + limit - chunked: - endpos = start + limit - chunked - assert endpos >= 0 - break - assert consumed >= 0 - - # We can put aside up to `endpos` - endpos = consumed + start - if limit >= 0 and endpos >= start + limit - chunked: - # Didn't find line ending, but reached length limit - endpos = start + limit - chunked - assert endpos >= 0 + if found or (limit >= 0 and builder.getlength() >= limit): break - # No line ending seen yet - put aside current data - if endpos > start: - s = line[start:endpos] - builder.append(s) - - # There may be some remaining bytes we'll have to prepend to the + # There may be some remaining chars we'll have to prepend to the # next chunk of data - if endpos < line_len: - remaining = line[endpos:] - line = None + if not self.decoded.exhausted(): + remnant = self.decoded.get_chars(-1) # We have consumed the buffer - self._unset_decoded() - - if line: - # Our line ends in the current buffer - decoded_chars_used = endpos - offset_to_buffer - assert decoded_chars_used >= 0 - self.decoded_chars_used = decoded_chars_used - if start > 0 or endpos < len(line): - line = line[start:endpos] - builder.append(line) - elif remaining: - builder.append(remaining) + self.decoded.reset() result = builder.build() return space.new_from_utf8(result) @@ -862,7 +899,7 @@ raise oefmt(space.w_IOError, "can't do nonzero end-relative seeks") space.call_method(self, "flush") - self._unset_decoded() + self.decoded.reset() self.snapshot = None if self.w_decoder: space.call_method(self.w_decoder, "reset") @@ -887,7 +924,7 @@ # Seek back to the safe start point space.call_method(self.w_buffer, "seek", space.newint(cookie.start_pos)) - self._unset_decoded() + self.decoded.reset() self.snapshot = None # Restore the decoder to its state from the safe start point. @@ -908,13 +945,13 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self._set_decoded(space, w_decoded) + self.decoded.set(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded_chars) < cookie.chars_to_skip: + if len(self.decoded.text) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded_chars_used = cookie.chars_to_skip + self.decoded.pos = cookie.chars_to_skip else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -940,7 +977,7 @@ w_pos = space.call_method(self.w_buffer, "tell") if self.w_decoder is None or self.snapshot is None: - assert not self.decoded_chars + assert not self.decoded.text return w_pos cookie = PositionCookie(space.bigint_w(w_pos)) @@ -951,11 +988,11 @@ cookie.start_pos -= len(input) # How many decoded characters have been used up since the snapshot? - if not self.decoded_chars_used: + if not self.decoded.pos: # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded_chars_used + chars_to_skip = self.decoded.pos # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py new file mode 100644 --- /dev/null +++ b/pypy/module/_io/test/test_interp_textio.py @@ -0,0 +1,68 @@ +import pytest +try: + from hypothesis import given, strategies as st, assume +except ImportError: + pytest.skip("hypothesis required") +from pypy.module._io.interp_bytesio import W_BytesIO +from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer + +LINESEP = ['', '\r', '\n', '\r\n'] + + at st.composite +def text_with_newlines(draw): + sep = draw(st.sampled_from(LINESEP)) + lines = draw(st.lists(st.text(max_size=10), max_size=10)) + return sep.join(lines) + + at given(txt=text_with_newlines(), + mode=st.sampled_from(['\r', '\n', '\r\n', '']), + limit=st.integers(min_value=-1)) +def test_readline(space, txt, mode, limit): + assume(limit != 0) + w_stream = W_BytesIO(space) + w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) + w_textio = W_TextIOWrapper(space) + w_textio.descr_init( + space, w_stream, encoding='utf-8', + w_newline=space.newtext(mode)) + lines = [] + while True: + line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + if limit > 0: + assert len(line) <= limit + if line: + lines.append(line) + else: + break + assert u''.join(lines) == txt + + at given(st.text()) +def test_read_buffer(text): + buf = DecodeBuffer(text) + assert buf.get_chars(-1) == text + assert buf.exhausted() + + at given(st.text(), st.lists(st.integers(min_value=0))) +def test_readn_buffer(text, sizes): + buf = DecodeBuffer(text) + strings = [] + for n in sizes: + s = buf.get_chars(n) + if not buf.exhausted(): + assert len(s) == n + else: + assert len(s) <= n + strings.append(s) + assert ''.join(strings) == text[:sum(sizes)] + + at given(st.text()) +def test_next_char(text): + buf = DecodeBuffer(text) + chars = [] + try: + while True: + chars.append(buf.next_char()) + except StopIteration: + pass + assert buf.exhausted() + assert u''.join(chars) == text diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py --- a/pypy/module/_multibytecodec/c_codecs.py +++ b/pypy/module/_multibytecodec/c_codecs.py @@ -197,19 +197,21 @@ MBENC_FLUSH = 1 MBENC_RESET = 2 -def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None): +def encode(codec, unicodedata, length, errors="strict", errorcb=None, + namecb=None): encodebuf = pypy_cjk_enc_new(codec) if not encodebuf: raise MemoryError try: - return encodeex(encodebuf, unicodedata, errors, errorcb, namecb) + return encodeex(encodebuf, unicodedata, length, errors, errorcb, namecb) finally: pypy_cjk_enc_free(encodebuf) -def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None, +def encodeex(encodebuf, utf8data, length, errors="strict", errorcb=None, namecb=None, ignore_error=0): - inleft = len(unicodedata) - with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf: + inleft = length + inbuf = rffi.utf82wcharp(utf8data, length) + try: if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0: raise MemoryError if ignore_error == 0: @@ -221,16 +223,18 @@ if r == 0 or r == ignore_error: break multibytecodec_encerror(encodebuf, r, errors, - errorcb, namecb, unicodedata) + errorcb, namecb, utf8data) while flags & MBENC_RESET: r = pypy_cjk_enc_reset(encodebuf) if r == 0: break multibytecodec_encerror(encodebuf, r, errors, - errorcb, namecb, unicodedata) + errorcb, namecb, utf8data) src = pypy_cjk_enc_outbuf(encodebuf) length = pypy_cjk_enc_outlen(encodebuf) return rffi.charpsize2str(src, length) + finally: + lltype.free(inbuf, flavor='raw') def multibytecodec_encerror(encodebuf, e, errors, errorcb, namecb, unicodedata): @@ -256,21 +260,16 @@ elif errors == "replace": codec = pypy_cjk_enc_getcodec(encodebuf) try: - replace = encode(codec, u"?") + replace = encode(codec, "?", 1) except EncodeDecodeError: replace = "?" else: assert errorcb - XXX - retu, rets, end = errorcb(errors, namecb, reason, - unicodedata.encode("utf8"), start, end) - if rets is not None: - # py3k only - replace = rets - else: - assert retu is not None - codec = pypy_cjk_enc_getcodec(encodebuf) - replace = encode(codec, retu, "strict", errorcb, namecb) + rets, end = errorcb(errors, namecb, reason, + unicodedata, start, end) + codec = pypy_cjk_enc_getcodec(encodebuf) + lgt, _ = rutf8.get_utf8_length_flag(rets) + replace = encode(codec, rets, lgt, "strict", errorcb, namecb) with rffi.scoped_nonmovingbuffer(replace) as inbuf: r = pypy_cjk_enc_replace_on_error(encodebuf, inbuf, len(replace), end) if r == MBERR_NOMEMORY: diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py --- a/pypy/module/_multibytecodec/interp_incremental.py +++ b/pypy/module/_multibytecodec/interp_incremental.py @@ -1,4 +1,5 @@ from rpython.rtyper.lltypesystem import lltype +from rpython.rlib import rutf8 from pypy.module._multibytecodec import c_codecs from pypy.module._multibytecodec.interp_multibytecodec import ( MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror, @@ -65,7 +66,8 @@ pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf) assert 0 <= pos <= len(object) self.pending = object[pos:] - return space.newunicode(output) + lgt, flag = rutf8.get_utf8_length_flag(output) + return space.newutf8(output, lgt, flag) @unwrap_spec(errors="text_or_none") @@ -88,7 +90,8 @@ def _initialize(self): self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec) - self.pending = u"" + self.pending = "" + self.pending_len = 0 def _free(self): self.pending = None @@ -96,25 +99,37 @@ c_codecs.pypy_cjk_enc_free(self.encodebuf) self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO) - @unwrap_spec(object='utf8', final=bool) - def encode_w(self, object, final=False): - u_object = object.decode('utf8') + @unwrap_spec(final=bool) + def encode_w(self, space, w_object, final=False): + utf8data, length = space.utf8_len_w(w_object) space = self.space state = space.fromcache(CodecState) if len(self.pending) > 0: - u_object = self.pending + u_object + utf8data = self.pending + utf8data + length += self.pending_len try: - output = c_codecs.encodeex(self.encodebuf, u_object, self.errors, + output = c_codecs.encodeex(self.encodebuf, utf8data, length, + self.errors, state.encode_error_handler, self.name, get_ignore_error(final)) except c_codecs.EncodeDecodeError as e: - raise wrap_unicodeencodeerror(space, e, object, len(u_object), + raise wrap_unicodeencodeerror(space, e, utf8data, length, self.name) except RuntimeError: raise wrap_runtimeerror(space) pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf) - assert 0 <= pos <= len(u_object) - self.pending = u_object[pos:] + assert 0 <= pos <= length + # scan the utf8 string until we hit pos + i = 0 + stop = length - pos + self.pending_len = stop + if stop > 0: + while pos > 0: + i = rutf8.next_codepoint_pos(utf8data, i) + pos -= 1 + self.pending = utf8data[i:] + else: + self.pending = "" return space.newbytes(output) diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py --- a/pypy/module/_multibytecodec/interp_multibytecodec.py +++ b/pypy/module/_multibytecodec/interp_multibytecodec.py @@ -31,23 +31,23 @@ return space.newtuple([space.newutf8(utf8_output, lgt, flag), space.newint(len(input))]) - @unwrap_spec(input='utf8', errors="text_or_none") - def encode(self, space, input, errors=None): + @unwrap_spec(errors="text_or_none") + def encode(self, space, w_input, errors=None): if errors is None: errors = 'strict' state = space.fromcache(CodecState) + input, length = space.utf8_len_w(w_input) # - u_input = input.decode('utf8') try: - output = c_codecs.encode(self.codec, u_input, errors, + output = c_codecs.encode(self.codec, input, length, errors, state.encode_error_handler, self.name) except c_codecs.EncodeDecodeError as e: - raise wrap_unicodeencodeerror(space, e, input, len(u_input), + raise wrap_unicodeencodeerror(space, e, input, length, self.name) except RuntimeError: raise wrap_runtimeerror(space) return space.newtuple([space.newbytes(output), - space.newint(len(u_input))]) + space.newint(length)]) MultibyteCodec.typedef = TypeDef( diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py --- a/pypy/module/_multibytecodec/test/test_c_codecs.py +++ b/pypy/module/_multibytecodec/test/test_c_codecs.py @@ -14,27 +14,27 @@ def test_decode_gbk(): c = getcodec("gbk") u = decode(c, "\xA1\xAA") - assert u == unichr(0x2014) + assert u == unichr(0x2014).encode('utf8') u = decode(c, "foobar") - assert u == u"foobar" + assert u == "foobar" def test_decode_hz(): # stateful c = getcodec("hz") u = decode(c, "~{abc}") - assert u == u'\u5f95\u6cef' + assert u == u'\u5f95\u6cef'.encode('utf8') u = decode(c, "~{") - assert u == u'' + assert u == '' def test_decodeex_hz(): c = getcodec("hz") decodebuf = c_codecs.pypy_cjk_dec_new(c) u = c_codecs.decodeex(decodebuf, "~{abcd~}") - assert u == u'\u5f95\u6c85' + assert u == u'\u5f95\u6c85'.encode('utf8') u = c_codecs.decodeex(decodebuf, "~{efgh~}") - assert u == u'\u5f50\u73b7' + assert u == u'\u5f50\u73b7'.encode('utf8') u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh") - assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7' + assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'.encode('utf8') c_codecs.pypy_cjk_dec_free(decodebuf) def test_decodeex_hz_incomplete(): @@ -64,7 +64,7 @@ buf += c u = c_codecs.decodeex(decodebuf, buf, ignore_error = c_codecs.MBERR_TOOFEW) - assert u == output + assert u == output.encode('utf8') incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf) buf = buf[incompletepos:] assert buf == '' @@ -86,46 +86,47 @@ def test_decode_hz_ignore(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'ignore') - assert u == u'def\u5fcf' + assert u == u'def\u5fcf'.encode('utf8') def test_decode_hz_replace(): c = getcodec("hz") u = decode(c, 'def~{}abc', 'replace') - assert u == u'def\ufffd\u5fcf' + assert u == u'def\ufffd\u5fcf'.encode('utf8') def test_encode_hz(): c = getcodec("hz") - s = encode(c, u'foobar') + s = encode(c, u'foobar'.encode('utf8'), 6) assert s == 'foobar' and type(s) is str - s = encode(c, u'\u5f95\u6cef') + s = encode(c, u'\u5f95\u6cef'.encode('utf8'), 2) assert s == '~{abc}~}' def test_encode_hz_error(): # error c = getcodec("hz") - e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value + e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def'.encode('utf8'), 7).value assert e.start == 3 assert e.end == 4 assert e.reason == "illegal multibyte sequence" def test_encode_hz_ignore(): c = getcodec("hz") - s = encode(c, u'abc\u1234def', 'ignore') + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'ignore') assert s == 'abcdef' def test_encode_hz_replace(): c = getcodec("hz") - s = encode(c, u'abc\u1234def', 'replace') + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'replace') assert s == 'abc?def' def test_encode_jisx0208(): c = getcodec('iso2022_jp') - s = encode(c, u'\u83ca\u5730\u6642\u592b') + s = encode(c, u'\u83ca\u5730\u6642\u592b'.encode('utf8'), 4) assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str def test_encode_custom_error_handler_bytes(): + py.test.skip("needs revamping in py3k") c = getcodec("hz") def errorhandler(errors, enc, msg, t, startingpos, endingpos): - return None, '\xc3', endingpos - s = encode(c, u'abc\u1234def', 'foo', errorhandler) + return u'\xc3'.encode('utf8'), endingpos + s = encode(c, u'abc\u1234def'.encode('utf8'), 7, 'foo', errorhandler) assert '\xc3' in s diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py --- a/pypy/module/_multibytecodec/test/test_translation.py +++ b/pypy/module/_multibytecodec/test/test_translation.py @@ -1,6 +1,7 @@ from pypy.module._multibytecodec import c_codecs from rpython.translator.c.test import test_standalone from rpython.config.translationoption import get_combined_translation_config +from rpython.rlib import rutf8 class TestTranslation(test_standalone.StandaloneTests): @@ -13,7 +14,8 @@ codecname, string = argv[1], argv[2] c = c_codecs.getcodec(codecname) u = c_codecs.decode(c, string) - r = c_codecs.encode(c, u) + lgt, _ = rutf8.get_utf8_length_flag(u) + r = c_codecs.encode(c, u, lgt) print r return 0 # diff --git a/pypy/module/_pypyjson/interp_decoder.py b/pypy/module/_pypyjson/interp_decoder.py --- a/pypy/module/_pypyjson/interp_decoder.py +++ b/pypy/module/_pypyjson/interp_decoder.py @@ -1,7 +1,7 @@ import sys from rpython.rlib.rstring import StringBuilder from rpython.rlib.objectmodel import specialize, always_inline, r_dict -from rpython.rlib import rfloat, runicode +from rpython.rlib import rfloat, runicode, rutf8 from rpython.rtyper.lltypesystem import lltype, rffi from pypy.interpreter.error import oefmt from pypy.interpreter import unicodehelper @@ -19,29 +19,6 @@ return 0.0 return x * NEG_POW_10[exp] -def strslice2unicode_latin1(s, start, end): - """ - Convert s[start:end] to unicode. s is supposed to be an RPython string - encoded in latin-1, which means that the numeric value of each char is the - same as the corresponding unicode code point. - - Internally it's implemented at the level of low-level helpers, to avoid - the extra copy we would need if we take the actual slice first. - - No bound checking is done, use carefully. - """ - from rpython.rtyper.annlowlevel import llstr, hlunicode - from rpython.rtyper.lltypesystem.rstr import malloc, UNICODE - from rpython.rtyper.lltypesystem.lltype import cast_primitive, UniChar - length = end-start - ll_s = llstr(s) - ll_res = malloc(UNICODE, length) - ll_res.hash = 0 - for i in range(length): - ch = ll_s.chars[start+i] - ll_res.chars[i] = cast_primitive(UniChar, ch) - return hlunicode(ll_res) - def slice_eq(a, b): (ll_chars1, start1, length1, _) = a (ll_chars2, start2, length2, _) = b @@ -270,10 +247,11 @@ self.pos = i+1 return self.space.newdict() - d = {} + # XXX this should be improved to use an unwrapped dict + w_dict = self.space.newdict() while True: # parse a key: value - name = self.decode_key(i) + w_name = self.decode_key(i) i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] if ch != ':': @@ -282,13 +260,13 @@ i = self.skip_whitespace(i) # w_value = self.decode_any(i) - d[name] = w_value + self.space.setitem(w_dict, w_name, w_value) i = self.skip_whitespace(self.pos) ch = self.ll_chars[i] i += 1 if ch == '}': self.pos = i - return self._create_dict(d) + return w_dict elif ch == ',': pass elif ch == '\0': @@ -297,10 +275,6 @@ self._raise("Unexpected '%s' when decoding object (char %d)", ch, i-1) - def _create_dict(self, d): - from pypy.objspace.std.dictmultiobject import from_unicode_key_dict - return from_unicode_key_dict(self.space, d) - def decode_string(self, i): start = i bits = 0 @@ -312,8 +286,7 @@ bits |= ord(ch) if ch == '"': self.pos = i - return self.space.newunicode( - self._create_string(start, i - 1, bits)) + return self._create_string(start, i - 1, bits) elif ch == '\\' or ch < '\x20': self.pos = i-1 return self.decode_string_escaped(start) @@ -322,12 +295,15 @@ if bits & 0x80: # the 8th bit is set, it's an utf8 string content_utf8 = self.getslice(start, end) - return unicodehelper.decode_utf8(self.space, content_utf8) + lgt, flag = unicodehelper.check_utf8_or_raise(self.space, + content_utf8) + return self.space.newutf8(content_utf8, lgt, flag) else: # ascii only, fast path (ascii is a strict subset of # latin1, and we already checked that all the chars are < # 128) - return strslice2unicode_latin1(self.s, start, end) + return self.space.newutf8(self.getslice(start, end), + end - start, rutf8.FLAG_ASCII) def decode_string_escaped(self, start): i = self.pos @@ -340,9 +316,10 @@ i += 1 if ch == '"': content_utf8 = builder.build() - content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) + lgt, f = unicodehelper.check_utf8_or_raise(self.space, + content_utf8) self.pos = i - return self.space.newunicode(content_unicode) + return self.space.newutf8(content_utf8, lgt, f) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': @@ -389,8 +366,7 @@ return # help the annotator to know that we'll never go beyond # this point # - uchr = runicode.code_to_unichr(val) # may be a surrogate pair again - utf8_ch = unicodehelper.encode_utf8(self.space, uchr) + utf8_ch = rutf8.unichr_as_utf8(val, allow_surrogates=True) builder.append(utf8_ch) return i @@ -404,7 +380,7 @@ return 0x10000 + (((highsurr - 0xd800) << 10) | (lowsurr - 0xdc00)) def decode_key(self, i): - """ returns an unwrapped unicode """ + """ returns a wrapped unicode """ from rpython.rlib.rarithmetic import intmask i = self.skip_whitespace(i) diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py --- a/pypy/module/_pypyjson/interp_encoder.py +++ b/pypy/module/_pypyjson/interp_encoder.py @@ -1,5 +1,5 @@ from rpython.rlib.rstring import StringBuilder -from rpython.rlib.runicode import str_decode_utf_8 +from rpython.rlib import rutf8 from pypy.interpreter import unicodehelper @@ -30,11 +30,8 @@ # the input is a string with only non-special ascii chars return w_string - eh = unicodehelper.decode_error_handler(space) - u = str_decode_utf_8( - s, len(s), None, final=True, errorhandler=eh, - allow_surrogates=True)[0] - sb = StringBuilder(len(u)) + unicodehelper.check_utf8_or_raise(space, s) + sb = StringBuilder(len(s)) sb.append_slice(s, 0, first) else: # We used to check if 'u' contains only safe characters, and return @@ -44,29 +41,31 @@ # a string (with the ascii encoding). This requires two passes # over the characters. So we may as well directly turn it into a # string here --- only one pass. - u = space.unicode_w(w_string) - sb = StringBuilder(len(u)) + s = space.utf8_w(w_string) + sb = StringBuilder(len(s)) first = 0 - for i in range(first, len(u)): - c = u[i] - if c <= u'~': - if c == u'"' or c == u'\\': + it = rutf8.Utf8StringIterator(s) + for i in range(first): + it.next() + for c in it: + if c <= ord('~'): + if c == ord('"') or c == ord('\\'): sb.append('\\') - elif c < u' ': - sb.append(ESCAPE_BEFORE_SPACE[ord(c)]) + elif c < ord(' '): + sb.append(ESCAPE_BEFORE_SPACE[c]) continue - sb.append(chr(ord(c))) + sb.append(chr(c)) else: - if c <= u'\uffff': + if c <= ord(u'\uffff'): sb.append('\\u') - sb.append(HEX[ord(c) >> 12]) - sb.append(HEX[(ord(c) >> 8) & 0x0f]) - sb.append(HEX[(ord(c) >> 4) & 0x0f]) - sb.append(HEX[ord(c) & 0x0f]) + sb.append(HEX[c >> 12]) + sb.append(HEX[(c >> 8) & 0x0f]) + sb.append(HEX[(c >> 4) & 0x0f]) + sb.append(HEX[c & 0x0f]) else: # surrogate pair - n = ord(c) - 0x10000 + n = c - 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) sb.append('\\ud') sb.append(HEX[(s1 >> 8) & 0x0f]) diff --git a/pypy/module/_pypyjson/test/test__pypyjson.py b/pypy/module/_pypyjson/test/test__pypyjson.py --- a/pypy/module/_pypyjson/test/test__pypyjson.py +++ b/pypy/module/_pypyjson/test/test__pypyjson.py @@ -10,10 +10,14 @@ assert dec.skip_whitespace(8) == len(s) dec.close() +class FakeSpace(object): + def newutf8(self, s, l, f): + return s + def test_decode_key(): s1 = "123" * 100 s = ' "%s" "%s" ' % (s1, s1) - dec = JSONDecoder('fake space', s) + dec = JSONDecoder(FakeSpace(), s) assert dec.pos == 0 x = dec.decode_key(0) assert x == s1 diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py --- a/pypy/module/_rawffi/alt/interp_funcptr.py +++ b/pypy/module/_rawffi/alt/interp_funcptr.py @@ -167,8 +167,8 @@ addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr) - def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): - buf = rffi.unicode2wcharp(unicodeval) + def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len): + buf = rffi.utf82wcharp(utf8val, utf8len) self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf)) addr = rffi.cast(rffi.ULONG, buf) self.argchain.arg(addr) diff --git a/pypy/module/_rawffi/alt/test/test_type_converter.py b/pypy/module/_rawffi/alt/test/test_type_converter.py --- a/pypy/module/_rawffi/alt/test/test_type_converter.py +++ b/pypy/module/_rawffi/alt/test/test_type_converter.py @@ -6,7 +6,7 @@ class DummyFromAppLevelConverter(FromAppLevelConverter): - def handle_all(self, w_ffitype, w_obj, val): + def handle_all(self, w_ffitype, w_obj, val, lgt=None): self.lastval = val handle_signed = handle_all @@ -120,8 +120,8 @@ def test_strings(self): # first, try automatic conversion from applevel self.check(app_types.char_p, self.space.newbytes('foo'), 'foo') - self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234') - self.check(app_types.unichar_p, self.space.wrap('foo'), u'foo') + self.check(app_types.unichar_p, self.space.wrap(u'foo\u1234'), u'foo\u1234'.encode('utf8')) + self.check(app_types.unichar_p, self.space.wrap('foo'), 'foo') # then, try to pass explicit pointers self.check(app_types.char_p, self.space.wrap(42), 42) self.check(app_types.unichar_p, self.space.wrap(42), 42) diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py --- a/pypy/module/_rawffi/alt/type_converter.py +++ b/pypy/module/_rawffi/alt/type_converter.py @@ -1,6 +1,6 @@ from rpython.rlib import libffi -from rpython.rlib import jit -from rpython.rlib.rarithmetic import r_uint +from rpython.rlib import jit, rutf8 +from rpython.rlib.rarithmetic import r_uint, intmask from pypy.interpreter.error import oefmt from pypy.module._rawffi.structure import W_StructureInstance, W_Structure from pypy.module._rawffi.alt.interp_ffitype import app_types @@ -85,8 +85,8 @@ return True elif w_ffitype.is_unichar_p() and (w_type is self.space.w_bytes or w_type is self.space.w_unicode): - unicodeval = self.space.unicode_w(w_obj) - self.handle_unichar_p(w_ffitype, w_obj, unicodeval) + utf8, lgt = self.space.utf8_len_w(w_obj) + self.handle_unichar_p(w_ffitype, w_obj, utf8, lgt) return True return False @@ -147,7 +147,7 @@ """ self.error(w_ffitype, w_obj) - def handle_unichar_p(self, w_ffitype, w_obj, unicodeval): + def handle_unichar_p(self, w_ffitype, w_obj, utf8val, utf8len): """ unicodeval: interp-level unicode """ @@ -228,7 +228,8 @@ return space.newbytes(chr(ucharval)) elif w_ffitype.is_unichar(): wcharval = self.get_unichar(w_ffitype) - return space.newunicode(unichr(wcharval)) + return space.newutf8(rutf8.unichr_as_utf8(wcharval), 1, + rutf8.get_flag_from_code(intmask(wcharval))) elif w_ffitype.is_double(): return self._float(w_ffitype) elif w_ffitype.is_singlefloat(): diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py --- a/pypy/module/_rawffi/interp_rawffi.py +++ b/pypy/module/_rawffi/interp_rawffi.py @@ -10,6 +10,7 @@ from rpython.rtyper.lltypesystem import lltype, rffi from rpython.rtyper.tool import rffi_platform from rpython.rlib.unroll import unrolling_iterable +from rpython.rlib import rutf8 from rpython.rlib.objectmodel import specialize import rpython.rlib.rposix as rposix @@ -416,13 +417,13 @@ val = s[0] push_func(add_arg, argdesc, val) elif letter == 'u': - s = space.unicode_w(w_arg) - if len(s) != 1: + s, lgt = space.utf8_len_w(w_arg) + if lgt != 1: raise oefmt(space.w_TypeError, "Expected unicode string of length one as wide " "character") - val = s[0] - push_func(add_arg, argdesc, val) + val = rutf8.codepoint_at_pos(s, 0) + push_func(add_arg, argdesc, rffi.cast(rffi.WCHAR_T, val)) else: for c in unroll_letters_for_numbers: if letter == c: diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py --- a/pypy/module/_sre/interp_sre.py +++ b/pypy/module/_sre/interp_sre.py @@ -7,7 +7,8 @@ from pypy.interpreter.error import OperationError, oefmt from rpython.rlib.rarithmetic import intmask from rpython.rlib import jit -from rpython.rlib.rstring import StringBuilder, UnicodeBuilder +from rpython.rlib.rstring import StringBuilder +from rpython.rlib.rutf8 import Utf8StringBuilder # ____________________________________________________________ # @@ -237,8 +238,8 @@ filter_is_callable = True else: if space.isinstance_w(w_ptemplate, space.w_unicode): - filter_as_unicode = space.unicode_w(w_ptemplate) - literal = u'\\' not in filter_as_unicode + filter_as_unicode = space.utf8_w(w_ptemplate) + literal = '\\' not in filter_as_unicode use_builder = ( space.isinstance_w(w_string, space.w_unicode) and literal) else: @@ -267,7 +268,7 @@ sublist_w = strbuilder = unicodebuilder = None if use_builder: if filter_as_unicode is not None: - unicodebuilder = UnicodeBuilder(ctx.end) + unicodebuilder = Utf8StringBuilder(ctx.end) else: assert filter_as_string is not None strbuilder = StringBuilder(ctx.end) @@ -335,7 +336,9 @@ return space.newbytes(strbuilder.build()), n else: assert unicodebuilder is not None - return space.newunicode(unicodebuilder.build()), n + return space.newutf8(unicodebuilder.build(), + unicodebuilder.get_length(), + unicodebuilder.get_flag()), n else: if space.isinstance_w(w_string, space.w_unicode): w_emptystr = space.newunicode(u'') diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py --- a/pypy/module/_ssl/interp_ssl.py +++ b/pypy/module/_ssl/interp_ssl.py @@ -1566,12 +1566,13 @@ cadata = space.bufferstr_w(w_cadata) else: ca_file_type = SSL_FILETYPE_PEM - try: - cadata = space.unicode_w(w_cadata).encode('ascii') - except UnicodeEncodeError: + w_uni = space.convert_arg_to_w_unicode(w_cadata) + if not w_uni.is_ascii(): raise oefmt(space.w_TypeError, "cadata should be a ASCII string or a " "bytes-like object") + cadata = space.utf8_w(w_uni) + if cafile is None and capath is None and cadata is None: raise oefmt(space.w_TypeError, "cafile and capath cannot be both omitted") diff --git a/pypy/objspace/std/dictmultiobject.py b/pypy/objspace/std/dictmultiobject.py --- a/pypy/objspace/std/dictmultiobject.py +++ b/pypy/objspace/std/dictmultiobject.py @@ -1257,12 +1257,6 @@ create_iterator_classes(UnicodeDictStrategy) -def from_unicode_key_dict(space, d): - strategy = space.fromcache(UnicodeDictStrategy) - storage = strategy.erase(d) - return W_DictObject(space, strategy, storage) - - class IntDictStrategy(AbstractTypedStrategy, DictStrategy): erase, unerase = rerased.new_erasing_pair("int") erase = staticmethod(erase) diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,23 +367,10 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) - def new_from_utf8(self, utf8s): - # XXX: kill me! - assert isinstance(utf8s, str) - length, flag = rutf8.check_utf8(utf8s, True) - return W_UnicodeObject(utf8s, length, flag) - def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding - def newunicode(self, unistr): - # XXX: kill me! - assert isinstance(unistr, unicode) - utf8s = unistr.encode("utf-8") - length, flag = rutf8.check_utf8(utf8s, True) - return self.newutf8(utf8s, length, flag) - def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py --- a/pypy/objspace/std/unicodeobject.py +++ b/pypy/objspace/std/unicodeobject.py @@ -64,6 +64,11 @@ # - malloced object, which means it has index, then # _index_storage.flags determines the kind + @staticmethod + def from_utf8builder(builder): + return W_UnicodeObject( + builder.build(), builder.get_length(), builder.get_flag()) + def __repr__(self): """representation for debugging purposes""" return "%s(%r)" % (self.__class__.__name__, self._utf8) @@ -344,57 +349,38 @@ return mod_format(space, w_values, self, do_unicode=True) def descr_swapcase(self, space): - selfvalue = self._utf8 - builder = StringBuilder(len(selfvalue)) - flag = self._get_flag() - i = 0 - while i < len(selfvalue): - ch = rutf8.codepoint_at_pos(selfvalue, i) - i = rutf8.next_codepoint_pos(selfvalue, i) + input = self._utf8 + builder = rutf8.Utf8StringBuilder(len(input)) + for ch in rutf8.Utf8StringIterator(input): if unicodedb.isupper(ch): ch = unicodedb.tolower(ch) elif unicodedb.islower(ch): ch = unicodedb.toupper(ch) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - return W_UnicodeObject(builder.build(), self._length, flag) + builder.append_code(ch) + return self.from_utf8builder(builder) def descr_title(self, space): if len(self._utf8) == 0: return self - utf8, flag = self.title_unicode(self._utf8) - return W_UnicodeObject(utf8, self._len(), flag) + return self.title_unicode(self._utf8) @jit.elidable def title_unicode(self, value): input = self._utf8 - builder = StringBuilder(len(input)) - i = 0 + builder = rutf8.Utf8StringBuilder(len(input)) previous_is_cased = False - flag = self._get_flag() - while i < len(input): - ch = rutf8.codepoint_at_pos(input, i) - i = rutf8.next_codepoint_pos(input, i) + for ch in rutf8.Utf8StringIterator(input): if not previous_is_cased: ch = unicodedb.totitle(ch) else: ch = unicodedb.tolower(ch) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) + builder.append_code(ch) previous_is_cased = unicodedb.iscased(ch) - return builder.build(), flag + return self.from_utf8builder(builder) def descr_translate(self, space, w_table): - input = self._utf8 - result = StringBuilder(len(input)) - result_length = 0 - flag = self._get_flag() - i = 0 - while i < len(input): - codepoint = rutf8.codepoint_at_pos(input, i) - i = rutf8.next_codepoint_pos(input, i) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for codepoint in rutf8.Utf8StringIterator(self._utf8): try: w_newval = space.getitem(w_table, space.newint(codepoint)) except OperationError as e: @@ -406,24 +392,19 @@ elif space.isinstance_w(w_newval, space.w_int): codepoint = space.int_w(w_newval) elif isinstance(w_newval, W_UnicodeObject): - result.append(w_newval._utf8) - flag = rutf8.combine_flags(flag, w_newval._get_flag()) - result_length += w_newval._length + builder.append_utf8( + w_newval._utf8, w_newval._length, w_newval._get_flag()) continue else: raise oefmt(space.w_TypeError, "character mapping must return integer, None " "or unicode") try: - if codepoint >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(result, codepoint, - allow_surrogates=True) - result_length += 1 + builder.append_code(codepoint) except ValueError: raise oefmt(space.w_TypeError, "character mapping must be in range(0x110000)") - return W_UnicodeObject(result.build(), result_length, flag) + return self.from_utf8builder(builder) def descr_find(self, space, w_sub, w_start=None, w_end=None): w_result = self._unwrap_and_search(space, w_sub, w_start, w_end) @@ -517,12 +498,6 @@ def _join_return_one(self, space, w_obj): return space.is_w(space.type(w_obj), space.w_unicode) - def _join_check_item(self, space, w_obj): - if (space.isinstance_w(w_obj, space.w_bytes) or - space.isinstance_w(w_obj, space.w_unicode)): - return 0 - return 1 - def descr_formatter_parser(self, space): from pypy.objspace.std.newformat import unicode_template_formatter tformat = unicode_template_formatter(space, space.utf8_w(self)) @@ -534,16 +509,11 @@ return tformat.formatter_field_name_split() def descr_lower(self, space): - builder = StringBuilder(len(self._utf8)) - pos = 0 - flag = self._get_flag() - while pos < len(self._utf8): - lower = unicodedb.tolower(rutf8.codepoint_at_pos(self._utf8, pos)) - if lower >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - rutf8.unichr_as_utf8_append(builder, lower, allow_surrogates=True) - pos = rutf8.next_codepoint_pos(self._utf8, pos) - return W_UnicodeObject(builder.build(), self._len(), flag) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for ch in rutf8.Utf8StringIterator(self._utf8): + lower = unicodedb.tolower(ch) + builder.append_code(lower) + return self.from_utf8builder(builder) def descr_isdecimal(self, space): return self._is_generic(space, '_isdecimal') @@ -657,13 +627,11 @@ flag = self._get_flag() for i in range(size): w_s = list_w[i] - check_item = self._join_check_item(space, w_s) - if check_item == 1: + if not (space.isinstance_w(w_s, space.w_bytes) or + space.isinstance_w(w_s, space.w_unicode)): raise oefmt(space.w_TypeError, - "sequence item %d: expected string, %T found", + "sequence item %d: expected string or unicode, %T found", i, w_s) - elif check_item == 2: - return self._join_autoconvert(space, list_w) # XXX Maybe the extra copy here is okay? It was basically going to # happen anyway, what with being placed into the builder w_u = self.convert_arg_to_w_unicode(space, w_s) @@ -711,18 +679,11 @@ return space.newlist(strs_w) def descr_upper(self, space): - value = self._utf8 - builder = StringBuilder(len(value)) - flag = self._get_flag() - i = 0 - while i < len(value): - uchar = rutf8.codepoint_at_pos(value, i) - uchar = unicodedb.toupper(uchar) - if uchar >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - i = rutf8.next_codepoint_pos(value, i) - rutf8.unichr_as_utf8_append(builder, uchar, allow_surrogates=True) - return W_UnicodeObject(builder.build(), self._length, flag) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + for ch in rutf8.Utf8StringIterator(self._utf8): + ch = unicodedb.toupper(ch) + builder.append_code(ch) + return self.from_utf8builder(builder) @unwrap_spec(width=int) def descr_zfill(self, space, width): @@ -826,22 +787,15 @@ if len(value) == 0: return self._empty() - flag = self._get_flag() - builder = StringBuilder(len(value)) - uchar = rutf8.codepoint_at_pos(value, 0) - i = rutf8.next_codepoint_pos(value, 0) + builder = rutf8.Utf8StringBuilder(len(self._utf8)) + it = rutf8.Utf8StringIterator(self._utf8) + uchar = it.next() ch = unicodedb.toupper(uchar) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - while i < len(value): - uchar = rutf8.codepoint_at_pos(value, i) - i = rutf8.next_codepoint_pos(value, i) - ch = unicodedb.tolower(uchar) - rutf8.unichr_as_utf8_append(builder, ch, allow_surrogates=True) - if ch >= 0x80: - flag = rutf8.combine_flags(flag, rutf8.FLAG_REGULAR) - return W_UnicodeObject(builder.build(), self._len(), flag) + builder.append_code(ch) + for ch in it: + ch = unicodedb.tolower(ch) + builder.append_code(ch) + return self.from_utf8builder(builder) @unwrap_spec(width=int, w_fillchar=WrappedDefault(' ')) def descr_center(self, space, width, w_fillchar): diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ cffi>=1.4.0 -vmprof>=0.4.10 # required to parse log files in rvmprof tests + +# parse log files in rvmprof tests +vmprof>=0.4.10; 'x86' in platform.machine #skip arm, s390x # hypothesis is used for test generation on untranslated tests hypothesis diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -687,6 +687,11 @@ self._lgt += 1 unichr_as_utf8_append(self._s, code, True) + def append_utf8(self, utf8, length, flag): + self._flag = combine_flags(self._flag, flag) + self._lgt += length + self._s.append(utf8) + def build(self): return self._s.build() @@ -702,10 +707,12 @@ self._end = len(utf8s) self._pos = 0 - def done(self): - return self._pos == self._end + def __iter__(self): + return self def next(self): From pypy.commits at gmail.com Mon Nov 27 16:17:15 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 27 Nov 2017 13:17:15 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: On wide hosts, though, we should continue to run this test about lone sorrogates Message-ID: <5a1c80db.8b421c0a.5110b.e595@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93185:3b96420db19a Date: 2017-11-23 17:50 +0100 http://bitbucket.org/pypy/pypy/changeset/3b96420db19a/ Log: On wide hosts, though, we should continue to run this test about lone sorrogates diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -155,7 +155,7 @@ exp_flag = rutf8.FLAG_HAS_SURROGATES break lgt, flag = rutf8.get_utf8_length_flag(''.join([c.encode('utf8') for c in u])) - if exp_flag != rutf8.FLAG_HAS_SURROGATES: + if exp_flag != rutf8.FLAG_HAS_SURROGATES or sys.maxunicode > 0xffff: assert lgt == exp_lgt assert flag == exp_flag From pypy.commits at gmail.com Mon Nov 27 17:11:14 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 27 Nov 2017 14:11:14 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Fix test (?) Message-ID: <5a1c8d82.480f1c0a.bb6fc.6702@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93188:a5ddce968cd5 Date: 2017-11-27 23:10 +0100 http://bitbucket.org/pypy/pypy/changeset/a5ddce968cd5/ Log: Fix test (?) diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -199,7 +199,7 @@ s = rutf8.Utf8StringBuilder() s.append_utf8("abc", 3, rutf8.FLAG_ASCII) assert s.get_flag() == rutf8.FLAG_ASCII - assert s.get_length() == 1 + assert s.get_length() == 3 assert s.build().decode("utf8") == u"abc" s.append_utf8(u"\u1234".encode("utf8"), 1, rutf8.FLAG_REGULAR) From pypy.commits at gmail.com Mon Nov 27 17:11:12 2017 From: pypy.commits at gmail.com (arigo) Date: Mon, 27 Nov 2017 14:11:12 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Add a utility function that I might use in rsre Message-ID: <5a1c8d80.ceb1df0a.8f6ae.3c58@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93187:7839b53125bb Date: 2017-11-27 23:09 +0100 http://bitbucket.org/pypy/pypy/changeset/7839b53125bb/ Log: Add a utility function that I might use in rsre diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -567,6 +567,31 @@ bytepos = next_codepoint_pos(utf8, bytepos) return codepoint_at_pos(utf8, bytepos) + at jit.dont_look_inside +def codepoint_index_at_byte_position(utf8, storage, bytepos): + """ Return the character index for which + codepoint_position_at_index(index) == bytepos. + This is a relatively slow operation in that it runs in a time + logarithmic in the length of the string, plus some constant that + is not tiny either. + """ + index_min = 0 + index_max = len(storage.contents) - 1 + while index_min < index_max: + index_middle = (index_min + index_max + 1) // 2 + base_bytepos = storage.contents[index_middle].baseindex + if bytepos < base_bytepos: + index_max = index_middle - 1 + else: + index_min = index_middle + bytepos1 = storage.contents[index_min].baseindex + result = index_min << 6 + while bytepos1 < bytepos: + bytepos1 = next_codepoint_pos(utf8, bytepos1) + result += 1 + return result + + def make_utf8_escape_function(pass_printable=False, quotes=False, prefix=None): @jit.elidable def unicode_escape(s): diff --git a/rpython/rlib/test/test_rutf8.py b/rpython/rlib/test/test_rutf8.py --- a/rpython/rlib/test/test_rutf8.py +++ b/rpython/rlib/test/test_rutf8.py @@ -128,6 +128,17 @@ assert (rutf8.codepoint_position_at_index(u.encode('utf8'), index, i) == len(u[:i].encode('utf8'))) + at given(strategies.text(average_size=140)) + at example(u'x' * 64 * 5) + at example(u'x' * (64 * 5 - 1)) +def test_codepoint_index_at_byte_position(u): + storage = rutf8.create_utf8_index_storage(u.encode('utf8'), len(u)) + for i in range(len(u) + 1): + bytepos = len(u[:i].encode('utf8')) + assert rutf8.codepoint_index_at_byte_position( + u.encode('utf8'), storage, bytepos) == i + + repr_func = rutf8.make_utf8_escape_function(prefix='u', pass_printable=False, quotes=True) From pypy.commits at gmail.com Tue Nov 28 08:35:26 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 28 Nov 2017 05:35:26 -0800 (PST) Subject: [pypy-commit] pypy default: cleanup, enable VSXXXCOMNTOOLS in win32 platform Message-ID: <5a1d661e.94571c0a.8e5e1.0bde@mx.google.com> Author: Matti Picus Branch: Changeset: r93189:1351a1844107 Date: 2017-11-28 15:33 +0200 http://bitbucket.org/pypy/pypy/changeset/1351a1844107/ Log: cleanup, enable VSXXXCOMNTOOLS in win32 platform diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -10,21 +10,13 @@ rpydir = str(py.path.local(rpython.__file__).join('..')) def _get_compiler_type(cc, x64_flag): - import subprocess if not cc: cc = os.environ.get('CC','') if not cc: return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - else: - return MsvcPlatform(cc=cc, x64=x64_flag) - try: - subprocess.check_output([cc, '--version']) - except: - raise ValueError("Could not find compiler specified by cc option '%s'," - " it must be a valid exe file on your path" % cc) - return MingwPlatform(cc) + return MsvcPlatform(cc=cc, x64=x64_flag) def Windows(cc=None): return _get_compiler_type(cc, False) @@ -60,6 +52,10 @@ vcvars = None try: toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] + if x64flag: + vcvars = os.path.join(toolsdir, "vcvarsamd64.bat") + else: + vcvars = os.path.join(toolsdir, 'vsvars32.bat') except KeyError: # try to import from the registry, as done in setuptools # XXX works for 90 but is it generalizable? From pypy.commits at gmail.com Tue Nov 28 10:10:40 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 07:10:40 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: a branch where to fix again the interaction of vmprof ans stacklets; in particular, after fix-vmprof-stacklet-switch we no longer segfault, but we stop sampling after a switch Message-ID: <5a1d7c70.c97e1c0a.c2665.d3ec@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93190:b8f121ce766d Date: 2017-11-28 11:50 +0100 http://bitbucket.org/pypy/pypy/changeset/b8f121ce766d/ Log: a branch where to fix again the interaction of vmprof ans stacklets; in particular, after fix-vmprof-stacklet-switch we no longer segfault, but we stop sampling after a switch From pypy.commits at gmail.com Tue Nov 28 10:10:43 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 07:10:43 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: move {start, stop}_sampling inside the VMProf API, and start to write a fake class to test the correct usage of them Message-ID: <5a1d7c73.130d1c0a.3131c.833b@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93191:2f204b1c432c Date: 2017-11-28 15:38 +0100 http://bitbucket.org/pypy/pypy/changeset/2f204b1c432c/ Log: move {start,stop}_sampling inside the VMProf API, and start to write a fake class to test the correct usage of them diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -56,10 +56,7 @@ return None def stop_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_stop_sampling - fd = vmprof_stop_sampling() - return rffi.cast(lltype.Signed, fd) + return _get_vmprof().stop_sampling() def start_sampling(): - from rpython.rlib.rvmprof.cintf import vmprof_start_sampling - vmprof_start_sampling() + return _get_vmprof().start_sampling() diff --git a/rpython/rlib/rvmprof/rvmprof.py b/rpython/rlib/rvmprof/rvmprof.py --- a/rpython/rlib/rvmprof/rvmprof.py +++ b/rpython/rlib/rvmprof/rvmprof.py @@ -168,6 +168,21 @@ if self.cintf.vmprof_register_virtual_function(name, uid, 500000) < 0: raise VMProfError("vmprof buffers full! disk full or too slow") + def stop_sampling(self): + """ + Temporarily stop the sampling of stack frames. Signals are still + delivered, but are ignored. + """ + fd = self.cintf.vmprof_stop_sampling() + return rffi.cast(lltype.Signed, fd) + + def start_sampling(self): + """ + Undo the effect of stop_sampling + """ + self.cintf.vmprof_start_sampling() + + def vmprof_execute_code(name, get_code_fn, result_class=None, _hack_update_stack_untranslated=False): """Decorator to be used on the function that interprets a code object. diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/support.py @@ -0,0 +1,26 @@ + +class FakeVMProf(object): + + def __init__(self): + self._enabled = False + self._ignore_signals = 1 + + # --- VMProf official API --- + # add fake methods as needed by the tests + + def stop_sampling(self): + self._ignore_signals += 1 + + def start_sampling(self): + assert self._ignore_signals > 0, ('calling start_sampling() without ' + 'the corresponding stop_sampling()?') + self._ignore_signals -= 1 + + # --- FakeVMProf specific API --- + # this API is not part of rvmprof, but available only inside tests using + # fakervmprof + + @property + def is_sampling_enabled(self): + return self._ignore_signals == 0 + diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py new file mode 100644 --- /dev/null +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -0,0 +1,23 @@ +import pytest +from rpython.rlib.rvmprof.test.support import FakeVMProf + +class TestFakeVMProf(object): + + def test_sampling(self): + fake = FakeVMProf() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert fake.is_sampling_enabled + # + fake.stop_sampling() + fake.stop_sampling() + assert not fake.is_sampling_enabled + # + fake.start_sampling() + assert not fake.is_sampling_enabled + fake.start_sampling() + assert fake.is_sampling_enabled + # + pytest.raises(AssertionError, "fake.start_sampling()") + From pypy.commits at gmail.com Tue Nov 28 10:10:45 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 07:10:45 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: WIP: introduce a pytest fixture which allow us to easily use a global FakeVMProf instead of the real one Message-ID: <5a1d7c75.923e1c0a.75bd2.eef1@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93192:d5d42f493530 Date: 2017-11-28 15:47 +0100 http://bitbucket.org/pypy/pypy/changeset/d5d42f493530/ Log: WIP: introduce a pytest fixture which allow us to easily use a global FakeVMProf instead of the real one diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py --- a/rpython/rlib/rvmprof/test/support.py +++ b/rpython/rlib/rvmprof/test/support.py @@ -1,3 +1,5 @@ +import pytest +from rpython.rlib import rvmprof class FakeVMProf(object): @@ -24,3 +26,12 @@ def is_sampling_enabled(self): return self._ignore_signals == 0 + + at pytest.fixture +def fakevmprof(request, monkeypatch): + fake = FakeVMProf() + def _get_fake_vmprof(): + return fake + monkeypatch.setattr(rvmprof.rvmprof, '_get_vmprof', _get_fake_vmprof) + return fake + diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py --- a/rpython/rlib/rvmprof/test/test_support.py +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -1,5 +1,6 @@ import pytest -from rpython.rlib.rvmprof.test.support import FakeVMProf +from rpython.rlib import rvmprof +from rpython.rlib.rvmprof.test.support import FakeVMProf, fakevmprof class TestFakeVMProf(object): @@ -21,3 +22,17 @@ # pytest.raises(AssertionError, "fake.start_sampling()") + + +class TestFixture(object): + + def test_fixture(self, fakevmprof): + assert isinstance(fakevmprof, FakeVMProf) + assert rvmprof.rvmprof._get_vmprof() is fakevmprof + # + # tweak sampling using the "real" API, and check that we actually used + # the fake + rvmprof.start_sampling() + assert fakevmprof.is_sampling_enabled + rvmprof.stop_sampling() + assert not fakevmprof.is_sampling_enabled From pypy.commits at gmail.com Tue Nov 28 10:10:47 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 07:10:47 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: 1) we can't monkey-patch _get_vmprof because it's imported in two places; insead it's easier to monkey-patch the singleton it returns; 2) move vmprof_{start,stop}_sampling to the proper cintf namespace, so that they can no longer be called directly Message-ID: <5a1d7c77.e28edf0a.a85e7.d837@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93193:b85210ca9c20 Date: 2017-11-28 15:54 +0100 http://bitbucket.org/pypy/pypy/changeset/b85210ca9c20/ Log: 1) we can't monkey-patch _get_vmprof because it's imported in two places; insead it's easier to monkey-patch the singleton it returns; 2) move vmprof_{start,stop}_sampling to the proper cintf namespace, so that they can no longer be called directly diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -122,32 +122,16 @@ lltype.Signed, compilation_info=eci, _nowrapper=True) + vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], + rffi.INT, compilation_info=eci, + _nowrapper=True) + vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], + lltype.Void, compilation_info=eci, + _nowrapper=True) + return CInterface(locals()) -# this is always present, but compiles to no-op if RPYTHON_VMPROF is not -# defined (i.e. if we don't actually use vmprof in the generated C) -auto_eci = ExternalCompilationInfo(post_include_bits=[""" -#ifndef RPYTHON_VMPROF -# define vmprof_stop_sampling() (-1) -# define vmprof_start_sampling() ((void)0) -#endif -"""]) - -if get_translation_config() is None: - # tests need the full eci here - _eci = global_eci -else: - _eci = auto_eci - -vmprof_stop_sampling = rffi.llexternal("vmprof_stop_sampling", [], - rffi.INT, compilation_info=_eci, - _nowrapper=True) -vmprof_start_sampling = rffi.llexternal("vmprof_start_sampling", [], - lltype.Void, compilation_info=_eci, - _nowrapper=True) - - class CInterface(object): def __init__(self, namespace): for k, v in namespace.iteritems(): diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py --- a/rpython/rlib/rvmprof/test/support.py +++ b/rpython/rlib/rvmprof/test/support.py @@ -30,8 +30,6 @@ @pytest.fixture def fakevmprof(request, monkeypatch): fake = FakeVMProf() - def _get_fake_vmprof(): - return fake - monkeypatch.setattr(rvmprof.rvmprof, '_get_vmprof', _get_fake_vmprof) + monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py --- a/rpython/rlib/rvmprof/test/test_support.py +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -28,7 +28,7 @@ def test_fixture(self, fakevmprof): assert isinstance(fakevmprof, FakeVMProf) - assert rvmprof.rvmprof._get_vmprof() is fakevmprof + assert rvmprof._get_vmprof() is fakevmprof # # tweak sampling using the "real" API, and check that we actually used # the fake From pypy.commits at gmail.com Tue Nov 28 10:10:49 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 07:10:49 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: add a pytest finalizer to check that we called {start, stop}_sampling an even amount of times. I have no idea how to write a test for it, though :( Message-ID: <5a1d7c79.8a5b1c0a.4a864.12f5@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93194:ff5fd2e1f430 Date: 2017-11-28 16:09 +0100 http://bitbucket.org/pypy/pypy/changeset/ff5fd2e1f430/ Log: add a pytest finalizer to check that we called {start,stop}_sampling an even amount of times. I have no idea how to write a test for it, though :( diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py --- a/rpython/rlib/rvmprof/test/support.py +++ b/rpython/rlib/rvmprof/test/support.py @@ -31,5 +31,12 @@ def fakevmprof(request, monkeypatch): fake = FakeVMProf() monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) + # + def check_status(): + if fake._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % fake._ignore_signals + request.addfinalizer(check_status) return fake - From pypy.commits at gmail.com Tue Nov 28 11:50:08 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 08:50:08 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: move the vmprof API needed by rstacklet from cintf to rvmprof/__init__.py, to integrate better with fakevmprof Message-ID: <5a1d93c0.05d31c0a.e821a.b73b@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93195:2907f533041c Date: 2017-11-28 16:48 +0100 http://bitbucket.org/pypy/pypy/changeset/2907f533041c/ Log: move the vmprof API needed by rstacklet from cintf to rvmprof/__init__.py, to integrate better with fakevmprof diff --git a/rpython/rlib/rstacklet.py b/rpython/rlib/rstacklet.py --- a/rpython/rlib/rstacklet.py +++ b/rpython/rlib/rstacklet.py @@ -3,7 +3,7 @@ from rpython.rlib import jit from rpython.rlib.objectmodel import fetch_translated_config from rpython.rtyper.lltypesystem import lltype, llmemory -from rpython.rlib.rvmprof import cintf +from rpython.rlib import rvmprof DEBUG = False @@ -25,12 +25,12 @@ def new(self, callback, arg=llmemory.NULL): if DEBUG: callback = _debug_wrapper(callback) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: - cintf.empty_rvmprof_stack() + rvmprof.empty_stack() h = self._gcrootfinder.new(self, callback, arg) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h @@ -40,11 +40,11 @@ def switch(self, stacklet): if DEBUG: debug.remove(stacklet) - x = cintf.save_rvmprof_stack() + x = rvmprof.save_stack() try: h = self._gcrootfinder.switch(stacklet) finally: - cintf.restore_rvmprof_stack(x) + rvmprof.restore_stack(x) if DEBUG: debug.add(h) return h diff --git a/rpython/rlib/rvmprof/__init__.py b/rpython/rlib/rvmprof/__init__.py --- a/rpython/rlib/rvmprof/__init__.py +++ b/rpython/rlib/rvmprof/__init__.py @@ -60,3 +60,23 @@ def start_sampling(): return _get_vmprof().start_sampling() + +# ---------------- +# stacklet support +# ---------------- +# +# Ideally, vmprof_tl_stack, VMPROFSTACK etc. should be part of "self.cintf": +# not sure why they are a global. Eventually, we should probably fix all this +# mess. +from rpython.rlib.rvmprof.cintf import vmprof_tl_stack, VMPROFSTACK + +def save_stack(): + stop_sampling() + return vmprof_tl_stack.get_or_make_raw() + +def empty_stack(): + vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) + +def restore_stack(x): + vmprof_tl_stack.setraw(x) + start_sampling() diff --git a/rpython/rlib/rvmprof/cintf.py b/rpython/rlib/rvmprof/cintf.py --- a/rpython/rlib/rvmprof/cintf.py +++ b/rpython/rlib/rvmprof/cintf.py @@ -216,20 +216,6 @@ leave_code(s) # -# stacklet support - -def save_rvmprof_stack(): - vmprof_stop_sampling() - return vmprof_tl_stack.get_or_make_raw() - -def empty_rvmprof_stack(): - vmprof_tl_stack.setraw(lltype.nullptr(VMPROFSTACK)) - -def restore_rvmprof_stack(x): - vmprof_tl_stack.setraw(x) - vmprof_start_sampling() - -# # traceback support def get_rvmprof_stack(): From pypy.commits at gmail.com Tue Nov 28 11:50:10 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 08:50:10 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: make check_status a real method, so that it can be tested and used also without the fixture Message-ID: <5a1d93c2.169a1c0a.63814.2120@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93196:d3c13697bffe Date: 2017-11-28 17:00 +0100 http://bitbucket.org/pypy/pypy/changeset/d3c13697bffe/ Log: make check_status a real method, so that it can be tested and used also without the fixture diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,4 +1,5 @@ import os +from rpython.rlib.rvmprof.test.support import fakevmprof from pypy.module._continuation.test.support import BaseAppTest diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py --- a/rpython/rlib/rvmprof/test/support.py +++ b/rpython/rlib/rvmprof/test/support.py @@ -26,17 +26,20 @@ def is_sampling_enabled(self): return self._ignore_signals == 0 + def check_status(self): + """ + To be called during test teardown + """ + if self._ignore_signals != 1: + msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' + 'got %d. This probably means that you called ' + '{start,stop}_sampling() a wrong number of times') + raise ValueError, msg % self._ignore_signals + @pytest.fixture def fakevmprof(request, monkeypatch): fake = FakeVMProf() monkeypatch.setattr(rvmprof.rvmprof, '_vmprof_instance', fake) - # - def check_status(): - if fake._ignore_signals != 1: - msg = ('Invalid value for fakevmprof._ignore_signals: expected 1, ' - 'got %d. This probably means that you called ' - '{start,stop}_sampling() a wrong number of times') - raise ValueError, msg % fake._ignore_signals - request.addfinalizer(check_status) + request.addfinalizer(fake.check_status) return fake diff --git a/rpython/rlib/rvmprof/test/test_support.py b/rpython/rlib/rvmprof/test/test_support.py --- a/rpython/rlib/rvmprof/test/test_support.py +++ b/rpython/rlib/rvmprof/test/test_support.py @@ -22,6 +22,10 @@ # pytest.raises(AssertionError, "fake.start_sampling()") + def test_check_status(self): + fake = FakeVMProf() + fake.stop_sampling() + pytest.raises(ValueError, "fake.check_status()") class TestFixture(object): From pypy.commits at gmail.com Tue Nov 28 11:50:16 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 08:50:16 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: fix test_sampling_inside_callback by restarting sampling inside Message-ID: <5a1d93c8.0e97df0a.585f3.c1e3@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93199:44449d69030b Date: 2017-11-28 17:49 +0100 http://bitbucket.org/pypy/pypy/changeset/44449d69030b/ Log: fix test_sampling_inside_callback by restarting sampling inside new_stacklet_callback, after it was stopped inside switch(). This fix is a bit obscure because one pairt of start/stop is inside rstacklet.switch, while the other is in interp_continuation. OTOH, if we do the fix inside rstacklet we need to replicate it for every GcRootFinder, which is also obscure and a nightmare to test. Note sure what is the least ugly :( diff --git a/pypy/module/_continuation/interp_continuation.py b/pypy/module/_continuation/interp_continuation.py --- a/pypy/module/_continuation/interp_continuation.py +++ b/pypy/module/_continuation/interp_continuation.py @@ -1,5 +1,6 @@ from rpython.rlib.rstacklet import StackletThread from rpython.rlib import jit +from rpython.rlib import rvmprof from pypy.interpreter.error import OperationError, get_cleared_operation_error from pypy.interpreter.executioncontext import ExecutionContext from pypy.interpreter.baseobjspace import W_Root @@ -222,12 +223,15 @@ self.h = h global_state.clear() try: + rvmprof.start_sampling() frame = self.bottomframe w_result = frame.execute_frame() except Exception as e: global_state.propagate_exception = e else: global_state.w_value = w_result + finally: + rvmprof.stop_sampling() self.sthread.ec.topframeref = jit.vref_None global_state.origin = self global_state.destination = self From pypy.commits at gmail.com Tue Nov 28 11:50:14 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 08:50:14 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: rename the fixture to app_fakevmprof and use it on all tests: this way, it automatically checks that {start, stop}_sampling has been called an even amount of times Message-ID: <5a1d93c6.42da1c0a.cfa52.fe60@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93198:a48521eb6944 Date: 2017-11-28 17:30 +0100 http://bitbucket.org/pypy/pypy/changeset/a48521eb6944/ Log: rename the fixture to app_fakevmprof and use it on all tests: this way, it automatically checks that {start,stop}_sampling has been called an even amount of times diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -4,7 +4,7 @@ from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest - + at pytest.mark.usefixtures('app_fakevmprof') class AppTestStacklet(BaseAppTest): def setup_class(cls): BaseAppTest.setup_class.im_func(cls) @@ -41,6 +41,29 @@ # make sure that "self.stack" does not pass the self cls.w_stack = staticmethod(cls.w_stack.im_func) + + @pytest.fixture + def app_fakevmprof(self, fakevmprof): + """ + This is automaticaly re-initialized for every method: thanks to + fakevmprof's finalizer, it checks that we called {start,stop}_sampling + the in pairs + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + + def test_new_empty(self): from _continuation import continulet # @@ -774,29 +797,6 @@ continulet.switch(c1, to=c2) raises(error, continulet.switch, c1, to=c2) - - at pytest.mark.usefixtures('init_method') -class AppTestVMProf(BaseAppTest): - - @pytest.fixture - def init_method(self, fakevmprof): - """ - This is automaticaly re-initialized for every method - """ - w = self.space.wrap - i2a = interp2app - def is_sampling_enabled(space): - return space.wrap(fakevmprof.is_sampling_enabled) - self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) - # - def start_sampling(space): - fakevmprof.start_sampling() - self.w_start_sampling = w(i2a(start_sampling)) - # - def stop_sampling(space): - fakevmprof.stop_sampling() - self.w_stop_sampling = w(i2a(stop_sampling)) - def test_sampling_inside_callback(self): from _continuation import continulet # @@ -804,10 +804,12 @@ assert self.is_sampling_enabled() return 42 # - self.start_sampling() - assert self.is_sampling_enabled() - c = continulet(my_callback) - res = c.switch() - assert res == 42 - assert self.is_sampling_enabled() - self.stop_sampling() + try: + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + finally: + self.stop_sampling() From pypy.commits at gmail.com Tue Nov 28 11:50:12 2017 From: pypy.commits at gmail.com (antocuni) Date: Tue, 28 Nov 2017 08:50:12 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: WIP: add a failing test which shows that we are not taking samples inside callbacks Message-ID: <5a1d93c4.8f9ddf0a.f99d6.4ed7@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93197:85cbd648e7dd Date: 2017-11-28 17:24 +0100 http://bitbucket.org/pypy/pypy/changeset/85cbd648e7dd/ Log: WIP: add a failing test which shows that we are not taking samples inside callbacks diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -1,5 +1,7 @@ +import pytest import os from rpython.rlib.rvmprof.test.support import fakevmprof +from pypy.interpreter.gateway import interp2app from pypy.module._continuation.test.support import BaseAppTest @@ -771,3 +773,41 @@ continulet.switch(c1, to=c2) raises(error, continulet.switch, c1, to=c2) + + + at pytest.mark.usefixtures('init_method') +class AppTestVMProf(BaseAppTest): + + @pytest.fixture + def init_method(self, fakevmprof): + """ + This is automaticaly re-initialized for every method + """ + w = self.space.wrap + i2a = interp2app + def is_sampling_enabled(space): + return space.wrap(fakevmprof.is_sampling_enabled) + self.w_is_sampling_enabled = w(i2a(is_sampling_enabled)) + # + def start_sampling(space): + fakevmprof.start_sampling() + self.w_start_sampling = w(i2a(start_sampling)) + # + def stop_sampling(space): + fakevmprof.stop_sampling() + self.w_stop_sampling = w(i2a(stop_sampling)) + + def test_sampling_inside_callback(self): + from _continuation import continulet + # + def my_callback(c1): + assert self.is_sampling_enabled() + return 42 + # + self.start_sampling() + assert self.is_sampling_enabled() + c = continulet(my_callback) + res = c.switch() + assert res == 42 + assert self.is_sampling_enabled() + self.stop_sampling() diff --git a/rpython/rlib/rvmprof/test/support.py b/rpython/rlib/rvmprof/test/support.py --- a/rpython/rlib/rvmprof/test/support.py +++ b/rpython/rlib/rvmprof/test/support.py @@ -20,7 +20,7 @@ # --- FakeVMProf specific API --- # this API is not part of rvmprof, but available only inside tests using - # fakervmprof + # fakevmprof @property def is_sampling_enabled(self): From pypy.commits at gmail.com Tue Nov 28 12:09:00 2017 From: pypy.commits at gmail.com (mattip) Date: Tue, 28 Nov 2017 09:09:00 -0800 (PST) Subject: [pypy-commit] pypy default: win32 fixes, even msdb does not know if it should be vcvarsXX.bat or vsvarsXX.bat Message-ID: <5a1d982c.476b1c0a.0877.d3fc@mx.google.com> Author: Matti Picus Branch: Changeset: r93200:d402ee2877e6 Date: 2017-11-28 19:11 +0200 http://bitbucket.org/pypy/pypy/changeset/d402ee2877e6/ Log: win32 fixes, even msdb does not know if it should be vcvarsXX.bat or vsvarsXX.bat diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -113,8 +113,10 @@ def test_environment_inheritance(self): # make sure that environment is inherited cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]' + env = {'_SOME_VARIABLE_1':'xyz'} + env['PATH'] = os.environ['PATH'] res = self.platform.execute(sys.executable, ['-c', cmd % 1], - env={'_SOME_VARIABLE_1':'xyz'}) + env=env) assert 'xyz' in res.out os.environ['_SOME_VARIABLE_2'] = 'zyz' try: diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -52,10 +52,6 @@ vcvars = None try: toolsdir = os.environ['VS%sCOMNTOOLS' % vsver] - if x64flag: - vcvars = os.path.join(toolsdir, "vcvarsamd64.bat") - else: - vcvars = os.path.join(toolsdir, 'vsvars32.bat') except KeyError: # try to import from the registry, as done in setuptools # XXX works for 90 but is it generalizable? @@ -69,7 +65,12 @@ vcbindir = os.path.join(vcinstalldir, 'BIN') vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: - vcvars = os.path.join(toolsdir, 'vsvars32.bat') + vcvars = os.path.join(toolsdir, 'vcvars32.bat') + if not os.path.exists(vcvars): + # even msdn does not know which to run + # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx + # wich names both + vcvars = os.path.join(toolsdir, 'vsvars32.bat') import subprocess try: From pypy.commits at gmail.com Tue Nov 28 14:25:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 28 Nov 2017 11:25:21 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: Remove newunicode() and unicode_w() again Message-ID: <5a1db821.2a9ddf0a.3f404.a46b@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93201:26f1724ee623 Date: 2017-11-28 19:21 +0000 http://bitbucket.org/pypy/pypy/changeset/26f1724ee623/ Log: Remove newunicode() and unicode_w() again diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py --- a/pypy/interpreter/baseobjspace.py +++ b/pypy/interpreter/baseobjspace.py @@ -1760,10 +1760,6 @@ def utf8_w(self, w_obj): return w_obj.utf8_w(self) - def unicode_w(self, w_obj): - # XXX: kill me! - return w_obj.utf8_w(self).decode('utf-8') - def convert_to_w_unicode(self, w_obj): return w_obj.convert_to_w_unicode(self) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -27,7 +27,8 @@ w_newline=space.newtext(mode)) lines = [] while True: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.utf8_w(w_line).decode('utf-8') if limit > 0: assert len(line) <= limit if line: diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -377,13 +377,6 @@ assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding - def newunicode(self, unistr): - # XXX: kill me! - assert isinstance(unistr, unicode) - utf8s = unistr.encode("utf-8") - length, flag = rutf8.check_utf8(utf8s, True) - return self.newutf8(utf8s, length, flag) - def type(self, w_obj): jit.promote(w_obj.__class__) return w_obj.getclass(self) From pypy.commits at gmail.com Tue Nov 28 14:25:23 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 28 Nov 2017 11:25:23 -0800 (PST) Subject: [pypy-commit] pypy utf8-io: close branch before merging Message-ID: <5a1db823.90a9df0a.93d36.0bb2@mx.google.com> Author: Ronan Lamy Branch: utf8-io Changeset: r93202:a4e5720003bb Date: 2017-11-28 19:22 +0000 http://bitbucket.org/pypy/pypy/changeset/a4e5720003bb/ Log: close branch before merging From pypy.commits at gmail.com Tue Nov 28 14:25:25 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 28 Nov 2017 11:25:25 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: Merge branch 'utf8-io': fix the _io module Message-ID: <5a1db825.3bb0df0a.d4ca0.74bc@mx.google.com> Author: Ronan Lamy Branch: unicode-utf8 Changeset: r93203:290c2d5ff0bb Date: 2017-11-28 19:23 +0000 http://bitbucket.org/pypy/pypy/changeset/290c2d5ff0bb/ Log: Merge branch 'utf8-io': fix the _io module diff --git a/pypy/module/_io/interp_stringio.py b/pypy/module/_io/interp_stringio.py --- a/pypy/module/_io/interp_stringio.py +++ b/pypy/module/_io/interp_stringio.py @@ -17,20 +17,20 @@ if len(self.data) > newlength: self.data = self.data[:newlength] if len(self.data) < newlength: - self.data.extend([u'\0'] * (newlength - len(self.data))) + self.data.extend(['\0'] * (newlength - len(self.data))) def read(self, size): start = self.pos available = len(self.data) - start if available <= 0: - return u'' + return '' if size >= 0 and size <= available: end = start + size else: end = len(self.data) assert 0 <= start <= end self.pos = end - return u''.join(self.data[start:end]) + return ''.join(self.data[start:end]) def _convert_limit(self, limit): if limit < 0 or limit > len(self.data) - self.pos: @@ -58,7 +58,7 @@ else: break self.pos = pos - result = u''.join(self.data[start:pos]) + result = ''.join(self.data[start:pos]) return result def readline(self, marker, limit): @@ -79,7 +79,7 @@ if not found: pos = end self.pos = pos - result = u''.join(self.data[start:pos]) + result = ''.join(self.data[start:pos]) return result def write(self, string): @@ -99,7 +99,7 @@ self.resize(size) def getvalue(self): - return u''.join(self.data) + return ''.join(self.data) class W_StringIO(W_TextIOBase): @@ -118,10 +118,10 @@ if space.is_w(w_newline, space.w_None): newline = None else: - newline = space.unicode_w(w_newline) + newline = space.utf8_w(w_newline) - if (newline is not None and newline != u"" and newline != u"\n" and - newline != u"\r" and newline != u"\r\n"): + if (newline is not None and newline != "" and newline != "\n" and + newline != "\r" and newline != "\r\n"): # Not using oefmt() because I don't know how to use it # with unicode raise OperationError(space.w_ValueError, @@ -131,9 +131,9 @@ ) if newline is not None: self.readnl = newline - self.readuniversal = newline is None or newline == u"" + self.readuniversal = newline is None or newline == "" self.readtranslate = newline is None - if newline and newline[0] == u"\r": + if newline and newline[0] == "\r": self.writenl = newline if self.readuniversal: self.w_decoder = space.call_function( @@ -152,7 +152,7 @@ if self.readnl is None: w_readnl = space.w_None else: - w_readnl = space.str(space.newunicode(self.readnl)) # YYY + w_readnl = space.str(space.new_from_utf8(self.readnl)) # YYY return space.newtuple([ w_initialval, w_readnl, space.newint(self.buf.pos), w_dict ]) @@ -179,7 +179,7 @@ # because the string value in the state tuple has already been # translated once by __init__. So we do not take any chance and replace # object's buffer completely - initval = space.unicode_w(w_initval) + initval = space.utf8_w(w_initval) pos = space.getindex_w(w_pos, space.w_TypeError) if pos < 0: raise oefmt(space.w_ValueError, @@ -215,8 +215,8 @@ if self.writenl: w_decoded = space.call_method( w_decoded, "replace", - space.newtext("\n"), space.newunicode(self.writenl)) - string = space.unicode_w(w_decoded) + space.newtext("\n"), space.new_from_utf8(self.writenl)) + string = space.utf8_w(w_decoded) if string: self.buf.write(string) @@ -225,7 +225,7 @@ def read_w(self, space, w_size=None): self._check_closed(space) size = convert_size(space, w_size) - return space.newunicode(self.buf.read(size)) + return space.new_from_utf8(self.buf.read(size)) def readline_w(self, space, w_limit=None): self._check_closed(space) @@ -235,11 +235,11 @@ else: if self.readtranslate: # Newlines are already translated, only search for \n - newline = u'\n' + newline = '\n' else: newline = self.readnl result = self.buf.readline(newline, limit) - return space.newunicode(result) + return space.new_from_utf8(result) @unwrap_spec(pos=int, mode=int) @@ -276,7 +276,7 @@ def getvalue_w(self, space): self._check_closed(space) - return space.newunicode(self.buf.getvalue()) + return space.new_from_utf8(self.buf.getvalue()) def readable_w(self, space): self._check_closed(space) diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -11,7 +11,8 @@ from rpython.rlib.rarithmetic import intmask, r_uint, r_ulonglong from rpython.rlib.rbigint import rbigint from rpython.rlib.rstring import StringBuilder -from rpython.rlib.rutf8 import FLAG_ASCII, check_utf8 +from rpython.rlib.rutf8 import ( + FLAG_ASCII, check_utf8, next_codepoint_pos, codepoints_in_utf8) STATE_ZERO, STATE_OK, STATE_DETACHED = range(3) @@ -303,7 +304,7 @@ def set(self, space, w_decoded): check_decoded(space, w_decoded) - self.text = space.unicode_w(w_decoded) + self.text = space.utf8_w(w_decoded) self.pos = 0 def reset(self): @@ -312,7 +313,7 @@ def get_chars(self, size): if self.text is None: - return u"" + return "" available = len(self.text) - self.pos if size < 0 or size > available: @@ -341,7 +342,7 @@ if self.exhausted(): raise StopIteration ch = self.text[self.pos] - self.pos += 1 + self.pos = next_codepoint_pos(self.text, self.pos) return ch def peek_char(self): @@ -362,16 +363,16 @@ ch = self.next_char() except StopIteration: return False - if ch == u'\n': + if ch == '\n': return True - if ch == u'\r': + if ch == '\r': if scanned >= limit: return False try: ch = self.peek_char() except StopIteration: return False - if ch == u'\n': + if ch == '\n': self.next_char() return True else: @@ -388,11 +389,11 @@ except StopIteration: return False scanned += 1 - if ch == u'\r': + if ch == '\r': if scanned >= limit: return False try: - if self.peek_char() == u'\n': + if self.peek_char() == '\n': self.next_char() return True except StopIteration: @@ -420,6 +421,7 @@ if not space.isinstance_w(w_decoded, space.w_unicode): msg = "decoder should return a string result, not '%T'" raise oefmt(space.w_TypeError, msg, w_decoded) + return w_decoded class W_TextIOWrapper(W_TextIOBase): @@ -705,11 +707,11 @@ else: if self.readtranslate: # Newlines are already translated, only search for \n - newline = u'\n' + newline = '\n' else: # Non-universal mode. newline = self.readnl - if newline == u'\r\n': + if newline == '\r\n': return self.decoded.find_crlf(limit) else: return self.decoded.find_char(newline[0], limit) @@ -945,13 +947,14 @@ w_decoded = space.call_method(self.w_decoder, "decode", w_chunk, space.newbool(bool(cookie.need_eof))) - self.decoded.set(space, w_decoded) + w_decoded = check_decoded(space, w_decoded) # Skip chars_to_skip of the decoded characters - if len(self.decoded.text) < cookie.chars_to_skip: + if space.len_w(w_decoded) < cookie.chars_to_skip: raise oefmt(space.w_IOError, "can't restore logical file position") - self.decoded.pos = cookie.chars_to_skip + self.decoded.set(space, w_decoded) + self.decoded.pos = w_decoded._index_to_byte(cookie.chars_to_skip) else: self.snapshot = PositionSnapshot(cookie.dec_flags, "") @@ -963,10 +966,8 @@ def tell_w(self, space): self._check_closed(space) - if not self.seekable: raise oefmt(space.w_IOError, "underlying stream is not seekable") - if not self.telling: raise oefmt(space.w_IOError, "telling position disabled by next() call") @@ -992,7 +993,8 @@ # We haven't moved from the snapshot point. return space.newlong_from_rbigint(cookie.pack()) - chars_to_skip = self.decoded.pos + chars_to_skip = codepoints_in_utf8( + self.decoded.text, end=self.decoded.pos) # Starting from the snapshot position, we will walk the decoder # forward until it gives us enough decoded characters. @@ -1036,14 +1038,14 @@ # We didn't get enough decoded data; signal EOF to get more. w_decoded = space.call_method(self.w_decoder, "decode", space.newbytes(""), - space.newint(1)) # final=1 + space.newint(1)) # final=1 check_decoded(space, w_decoded) - chars_decoded += len(space.unicode_w(w_decoded)) + chars_decoded += space.len_w(w_decoded) cookie.need_eof = 1 if chars_decoded < chars_to_skip: raise oefmt(space.w_IOError, - "can't reconstruct logical file position") + "can't reconstruct logical file position") finally: space.call_method(self.w_decoder, "setstate", w_saved_state) diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -27,7 +27,8 @@ w_newline=space.newtext(mode)) lines = [] while True: - line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) + w_line = w_textio.readline_w(space, space.newint(limit)) + line = space.utf8_w(w_line).decode('utf-8') if limit > 0: assert len(line) <= limit if line: @@ -38,31 +39,27 @@ @given(st.text()) def test_read_buffer(text): - buf = DecodeBuffer(text) - assert buf.get_chars(-1) == text + buf = DecodeBuffer(text.encode('utf-8')) + assert buf.get_chars(-1) == text.encode('utf-8') assert buf.exhausted() @given(st.text(), st.lists(st.integers(min_value=0))) def test_readn_buffer(text, sizes): - buf = DecodeBuffer(text) + buf = DecodeBuffer(text.encode('utf-8')) strings = [] for n in sizes: s = buf.get_chars(n) if not buf.exhausted(): - assert len(s) == n + assert len(s.decode('utf-8')) == n else: - assert len(s) <= n + assert len(s.decode('utf-8')) <= n strings.append(s) - assert ''.join(strings) == text[:sum(sizes)] + assert ''.join(strings) == text[:sum(sizes)].encode('utf-8') @given(st.text()) def test_next_char(text): - buf = DecodeBuffer(text) - chars = [] - try: - while True: - chars.append(buf.next_char()) - except StopIteration: - pass + buf = DecodeBuffer(text.encode('utf-8')) + for i in range(len(text)): + ch = buf.next_char() + assert ch == text[i].encode('utf-8')[0] assert buf.exhausted() - assert u''.join(chars) == text diff --git a/pypy/module/_io/test/test_ztranslation.py b/pypy/module/_io/test/test_ztranslation.py deleted file mode 100644 --- a/pypy/module/_io/test/test_ztranslation.py +++ /dev/null @@ -1,4 +0,0 @@ -from pypy.objspace.fake.checkmodule import checkmodule - -def test_checkmodule(): - checkmodule('_io') diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py --- a/pypy/objspace/fake/objspace.py +++ b/pypy/objspace/fake/objspace.py @@ -212,6 +212,12 @@ def newutf8(self, x, l, f): return w_some_obj() + def new_from_utf8(self, a): + return w_some_obj() + + def newunicode(self, a): + return w_some_obj() + newtext = newbytes newtext_or_none = newbytes newfilename = newbytes diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py --- a/pypy/objspace/std/objspace.py +++ b/pypy/objspace/std/objspace.py @@ -367,6 +367,12 @@ assert isinstance(utf8s, str) return W_UnicodeObject(utf8s, length, flag) + def new_from_utf8(self, utf8s): + # XXX: kill me! + assert isinstance(utf8s, str) + length, flag = rutf8.check_utf8(utf8s, True) + return W_UnicodeObject(utf8s, length, flag) + def newfilename(self, s): assert isinstance(s, str) # on pypy3, this decodes the byte string return W_BytesObject(s) # with the filesystem encoding From pypy.commits at gmail.com Tue Nov 28 14:29:42 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 28 Nov 2017 11:29:42 -0800 (PST) Subject: [pypy-commit] pypy py3.5: shut up test_whatsnew Message-ID: <5a1db926.8bc4df0a.7daed.2181@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93204:49f614a4e075 Date: 2017-11-28 19:27 +0000 http://bitbucket.org/pypy/pypy/changeset/49f614a4e075/ Log: shut up test_whatsnew diff --git a/pypy/doc/whatsnew-pypy3-head.rst b/pypy/doc/whatsnew-pypy3-head.rst --- a/pypy/doc/whatsnew-pypy3-head.rst +++ b/pypy/doc/whatsnew-pypy3-head.rst @@ -14,3 +14,5 @@ .. branch: py3.5-mac-embedding Download and patch dependencies when building cffi-based stdlib modules + +.. branch: os_lockf From pypy.commits at gmail.com Tue Nov 28 17:13:21 2017 From: pypy.commits at gmail.com (rlamy) Date: Tue, 28 Nov 2017 14:13:21 -0800 (PST) Subject: [pypy-commit] pypy default: Improve test_textio so that it detects the current issues Message-ID: <5a1ddf81.83b91c0a.9bd6f.5b8c@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93205:47f75e26f6cf Date: 2017-11-28 22:11 +0000 http://bitbucket.org/pypy/pypy/changeset/47f75e26f6cf/ Log: Improve test_textio so that it detects the current issues diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) From pypy.commits at gmail.com Wed Nov 29 03:10:49 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 29 Nov 2017 00:10:49 -0800 (PST) Subject: [pypy-commit] pypy win32-vcvars: maybe fix? Message-ID: <5a1e6b89.54d91c0a.1b0ff.5d39@mx.google.com> Author: Matti Picus Branch: win32-vcvars Changeset: r93206:7f64b9246539 Date: 2017-11-29 10:13 +0200 http://bitbucket.org/pypy/pypy/changeset/7f64b9246539/ Log: maybe fix? diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -65,12 +65,12 @@ vcbindir = os.path.join(vcinstalldir, 'BIN') vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + vcvars = os.path.join(toolsdir, 'vsvars32.bat') if not os.path.exists(vcvars): # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx # wich names both - vcvars = os.path.join(toolsdir, 'vsvars32.bat') + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -92,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 - env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: + env, errstr = _get_msvc_env(vsver, x64flag) if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Wed Nov 29 03:22:37 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 29 Nov 2017 00:22:37 -0800 (PST) Subject: [pypy-commit] pypy win32-vcvars: typo Message-ID: <5a1e6e4d.90a9df0a.6473d.8b41@mx.google.com> Author: Matti Picus Branch: win32-vcvars Changeset: r93207:5d23987c65b3 Date: 2017-11-29 10:25 +0200 http://bitbucket.org/pypy/pypy/changeset/5d23987c65b3/ Log: typo diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -106,7 +106,7 @@ vcvers.insert(0, vsver) errs = [] for vsver in vcvers: - env, errstr = _get_msvc_env(vsver, x64flag) + env = _get_msvc_env(vsver, x64flag) if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Wed Nov 29 03:42:47 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 29 Nov 2017 00:42:47 -0800 (PST) Subject: [pypy-commit] pypy default: merge win32-vcvars, log more and try vsvars32 before vcvars32, go figure Message-ID: <5a1e7307.47b0df0a.7a070.2da6@mx.google.com> Author: Matti Picus Branch: Changeset: r93209:d1aaa6aca19d Date: 2017-11-29 10:41 +0200 http://bitbucket.org/pypy/pypy/changeset/d1aaa6aca19d/ Log: merge win32-vcvars, log more and try vsvars32 before vcvars32, go figure diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -65,12 +65,12 @@ vcbindir = os.path.join(vcinstalldir, 'BIN') vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: - vcvars = os.path.join(toolsdir, 'vcvars32.bat') + vcvars = os.path.join(toolsdir, 'vsvars32.bat') if not os.path.exists(vcvars): # even msdn does not know which to run # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx # wich names both - vcvars = os.path.join(toolsdir, 'vsvars32.bat') + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -92,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Wed Nov 29 03:42:45 2017 From: pypy.commits at gmail.com (mattip) Date: Wed, 29 Nov 2017 00:42:45 -0800 (PST) Subject: [pypy-commit] pypy win32-vcvars: both build slaves seem happy Message-ID: <5a1e7305.8b951c0a.2f62d.7b36@mx.google.com> Author: Matti Picus Branch: win32-vcvars Changeset: r93208:b6119a8ea747 Date: 2017-11-29 10:40 +0200 http://bitbucket.org/pypy/pypy/changeset/b6119a8ea747/ Log: both build slaves seem happy diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + From pypy.commits at gmail.com Wed Nov 29 09:34:19 2017 From: pypy.commits at gmail.com (cfbolz) Date: Wed, 29 Nov 2017 06:34:19 -0800 (PST) Subject: [pypy-commit] pypy default: - hypothesis tests for the IntBound methods Message-ID: <5a1ec56b.42b2df0a.cc414.6229@mx.google.com> Author: Carl Friedrich Bolz-Tereick Branch: Changeset: r93210:a805f563cfd0 Date: 2017-11-29 15:33 +0100 http://bitbucket.org/pypy/pypy/changeset/a805f563cfd0/ Log: - hypothesis tests for the IntBound methods - lighgly refactor optimization of some int_ ops to make hypothesis testing possible diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,31 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +334,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +349,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) From pypy.commits at gmail.com Wed Nov 29 10:20:26 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 29 Nov 2017 07:20:26 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: add a test which checks that vmprof is actually enabled inside greenlets; it fails on default and passes on this branch Message-ID: <5a1ed03a.47b0df0a.7a070.d0d1@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93213:9ac249e058e3 Date: 2017-11-29 16:17 +0100 http://bitbucket.org/pypy/pypy/changeset/9ac249e058e3/ Log: add a test which checks that vmprof is actually enabled inside greenlets; it fails on default and passes on this branch diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt --- a/extra_tests/requirements.txt +++ b/extra_tests/requirements.txt @@ -1,2 +1,3 @@ pytest hypothesis +vmprof diff --git a/extra_tests/test_vmprof_greenlet.py b/extra_tests/test_vmprof_greenlet.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_vmprof_greenlet.py @@ -0,0 +1,28 @@ +import time +import pytest +import greenlet +import vmprof + +def count_samples(filename): + stats = vmprof.read_profile(filename) + return len(stats.profiles) + +def cpuburn(duration): + end = time.time() + duration + while time.time() < end: + pass + +def test_sampling_inside_callback(tmpdir): + # see also test_sampling_inside_callback inside + # pypy/module/_continuation/test/test_stacklet.py + # + G = greenlet.greenlet(cpuburn) + fname = tmpdir.join('log.vmprof') + with fname.open('w+b') as f: + vmprof.enable(f.fileno(), 1/250.0) + G.switch(0.1) + vmprof.disable() + + samples = count_samples(str(fname)) + # 0.1 seconds at 250Hz should be 25 samples + assert 23 < samples < 27 diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -799,6 +799,9 @@ def test_sampling_inside_callback(self): if self.runappdirect: + # see also + # extra_tests.test_vmprof_greenlet.test_sampling_inside_callback + # for a "translated" version of this test skip("we can't run this until we have _vmprof.is_sampling_enabled") from _continuation import continulet # From pypy.commits at gmail.com Wed Nov 29 10:20:22 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 29 Nov 2017 07:20:22 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: skip this when run with -A Message-ID: <5a1ed036.9085df0a.fe17c.b828@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93211:37b5a39510e7 Date: 2017-11-29 11:25 +0100 http://bitbucket.org/pypy/pypy/changeset/37b5a39510e7/ Log: skip this when run with -A diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -798,6 +798,8 @@ raises(error, continulet.switch, c1, to=c2) def test_sampling_inside_callback(self): + if self.runappdirect: + skip("we can't run this until we have _vmprof.is_sampling_enabled") from _continuation import continulet # def my_callback(c1): From pypy.commits at gmail.com Wed Nov 29 10:20:28 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 29 Nov 2017 07:20:28 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: document this branch Message-ID: <5a1ed03c.c7a4df0a.6e59d.622d@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93214:f7fa5a2b8f51 Date: 2017-11-29 16:19 +0100 http://bitbucket.org/pypy/pypy/changeset/f7fa5a2b8f51/ Log: document this branch diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -25,4 +25,6 @@ Upgrade the _vmprof backend to vmprof 0.4.10 .. branch: fix-vmprof-stacklet-switch -Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) +.. branch: fix-vmprof-stacklet-switch-2 +Fix vmprof+ continulet (i.e. greenelts, eventlet, gevent, ...) + From pypy.commits at gmail.com Wed Nov 29 10:20:24 2017 From: pypy.commits at gmail.com (antocuni) Date: Wed, 29 Nov 2017 07:20:24 -0800 (PST) Subject: [pypy-commit] pypy fix-vmprof-stacklet-switch-2: simplify Message-ID: <5a1ed038.0abadf0a.a9a6c.752d@mx.google.com> Author: Antonio Cuni Branch: fix-vmprof-stacklet-switch-2 Changeset: r93212:4776dc8a84f7 Date: 2017-11-29 16:16 +0100 http://bitbucket.org/pypy/pypy/changeset/4776dc8a84f7/ Log: simplify diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -1,4 +1,5 @@ import py +import pytest try: import _continuation except ImportError: @@ -101,11 +102,7 @@ particular, we need to ensure that vmprof does not sample the stack in the middle of a switch, else we read nonsense. """ - try: - import _vmprof - except ImportError: - py.test.skip("no _vmprof") - # + _vmprof = pytest.importorskip('_vmprof') def switch_forever(c): while True: c.switch() From pypy.commits at gmail.com Wed Nov 29 13:34:12 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 29 Nov 2017 10:34:12 -0800 (PST) Subject: [pypy-commit] pypy default: Improve interp-level test and fix bugs in W_TextIOWrapper.readline_w() Message-ID: <5a1efda4.0e97df0a.8f776.4026@mx.google.com> Author: Ronan Lamy Branch: Changeset: r93215:b2e4b128808e Date: 2017-11-29 18:32 +0000 http://bitbucket.org/pypy/pypy/changeset/b2e4b128808e/ Log: Improve interp-level test and fix bugs in W_TextIOWrapper.readline_w() diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -353,6 +353,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == u'\n': @@ -737,7 +738,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,40 +1,53 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): From pypy.commits at gmail.com Wed Nov 29 13:36:01 2017 From: pypy.commits at gmail.com (rlamy) Date: Wed, 29 Nov 2017 10:36:01 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a1efe11.c23a1c0a.8ea05.4bab@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93216:fcf5d3fb56f4 Date: 2017-11-29 18:34 +0000 http://bitbucket.org/pypy/pypy/changeset/fcf5d3fb56f4/ Log: hg merge default diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py --- a/extra_tests/test_textio.py +++ b/extra_tests/test_textio.py @@ -1,28 +1,48 @@ from hypothesis import given, strategies as st from io import BytesIO, TextIOWrapper +import os -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(txt, mode, limit): + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data textio = TextIOWrapper( - BytesIO(txt.encode('utf-8')), encoding='utf-8', newline=mode) + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) lines = [] - while True: + for limit in limits: line = textio.readline(limit) - if limit > 0: - assert len(line) < limit + if limit >= 0: + assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -26,3 +26,6 @@ .. branch: fix-vmprof-stacklet-switch Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py --- a/pypy/module/_io/interp_textio.py +++ b/pypy/module/_io/interp_textio.py @@ -368,6 +368,7 @@ while scanned < limit: try: ch = self.next_char() + scanned += 1 except StopIteration: return False if ch == u'\n': @@ -780,7 +781,7 @@ remnant = None continue - if limit > 0: + if limit >= 0: remaining = limit - builder.getlength() assert remaining >= 0 else: diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py --- a/pypy/module/_io/test/test_interp_textio.py +++ b/pypy/module/_io/test/test_interp_textio.py @@ -1,40 +1,53 @@ import pytest try: - from hypothesis import given, strategies as st, assume + from hypothesis import given, strategies as st except ImportError: pytest.skip("hypothesis required") +import os from pypy.module._io.interp_bytesio import W_BytesIO from pypy.module._io.interp_textio import W_TextIOWrapper, DecodeBuffer -LINESEP = ['', '\r', '\n', '\r\n'] +def translate_newlines(text): + text = text.replace(u'\r\n', u'\n') + text = text.replace(u'\r', u'\n') + return text.replace(u'\n', os.linesep) @st.composite -def text_with_newlines(draw): - sep = draw(st.sampled_from(LINESEP)) - lines = draw(st.lists(st.text(max_size=10), max_size=10)) - return sep.join(lines) +def st_readline(draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + fragments = [] + limits = [] + for _ in range(n_lines): + line = draw(st.text(st.characters(blacklist_characters=u'\r\n'))) + fragments.append(line) + ending = draw(st.sampled_from([u'\n', u'\r', u'\r\n'])) + fragments.append(ending) + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + return (u''.join(fragments), limits) - at given(txt=text_with_newlines(), - mode=st.sampled_from(['\r', '\n', '\r\n', '']), - limit=st.integers(min_value=-1)) -def test_readline(space, txt, mode, limit): - assume(limit != 0) + at given(data=st_readline(), + mode=st.sampled_from(['\r', '\n', '\r\n', ''])) +def test_readline(space, data, mode): + txt, limits = data w_stream = W_BytesIO(space) w_stream.descr_init(space, space.newbytes(txt.encode('utf-8'))) w_textio = W_TextIOWrapper(space) w_textio.descr_init( - space, w_stream, encoding='utf-8', + space, w_stream, + encoding='utf-8', w_errors=space.newtext('surrogatepass'), w_newline=space.newtext(mode)) lines = [] - while True: + for limit in limits: line = space.unicode_w(w_textio.readline_w(space, space.newint(limit))) - if limit > 0: + if limit >= 0: assert len(line) <= limit if line: lines.append(line) - else: + elif limit: break - assert u''.join(lines) == txt + assert txt.startswith(u''.join(lines)) @given(st.text()) def test_read_buffer(text): diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,31 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +334,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +349,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) diff --git a/rpython/translator/platform/test/test_platform.py b/rpython/translator/platform/test/test_platform.py --- a/rpython/translator/platform/test/test_platform.py +++ b/rpython/translator/platform/test/test_platform.py @@ -113,8 +113,10 @@ def test_environment_inheritance(self): # make sure that environment is inherited cmd = 'import os; print os.environ["_SOME_VARIABLE_%d"]' + env = {'_SOME_VARIABLE_1':'xyz'} + env['PATH'] = os.environ['PATH'] res = self.platform.execute(sys.executable, ['-c', cmd % 1], - env={'_SOME_VARIABLE_1':'xyz'}) + env=env) assert 'xyz' in res.out os.environ['_SOME_VARIABLE_2'] = 'zyz' try: diff --git a/rpython/translator/platform/windows.py b/rpython/translator/platform/windows.py --- a/rpython/translator/platform/windows.py +++ b/rpython/translator/platform/windows.py @@ -10,21 +10,13 @@ rpydir = str(py.path.local(rpython.__file__).join('..')) def _get_compiler_type(cc, x64_flag): - import subprocess if not cc: cc = os.environ.get('CC','') if not cc: return MsvcPlatform(x64=x64_flag) elif cc.startswith('mingw') or cc == 'gcc': return MingwPlatform(cc) - else: - return MsvcPlatform(cc=cc, x64=x64_flag) - try: - subprocess.check_output([cc, '--version']) - except: - raise ValueError("Could not find compiler specified by cc option '%s'," - " it must be a valid exe file on your path" % cc) - return MingwPlatform(cc) + return MsvcPlatform(cc=cc, x64=x64_flag) def Windows(cc=None): return _get_compiler_type(cc, False) @@ -74,6 +66,11 @@ vcvars = os.path.join(vcbindir, 'amd64', 'vcvarsamd64.bat') else: vcvars = os.path.join(toolsdir, 'vsvars32.bat') + if not os.path.exists(vcvars): + # even msdn does not know which to run + # see https://msdn.microsoft.com/en-us/library/1700bbwd(v=vs.90).aspx + # wich names both + vcvars = os.path.join(toolsdir, 'vcvars32.bat') import subprocess try: @@ -95,25 +92,21 @@ key, value = line.split('=', 1) if key.upper() in ['PATH', 'INCLUDE', 'LIB']: env[key.upper()] = value - ## log.msg("Updated environment with %s" % (vcvars,)) + log.msg("Updated environment with %s" % (vcvars,)) return env def find_msvc_env(x64flag=False): + vcvers = [140, 100, 90, 80, 71, 70] # First, try to get the compiler which served to compile python msc_pos = sys.version.find('MSC v.') if msc_pos != -1: msc_ver = int(sys.version[msc_pos+6:msc_pos+10]) - # 1300 -> 70, 1310 -> 71, 1400 -> 80, 1500 -> 90 + # 1500 -> 90, 1900 -> 140 vsver = (msc_ver / 10) - 60 + vcvers.insert(0, vsver) + errs = [] + for vsver in vcvers: env = _get_msvc_env(vsver, x64flag) - - if env is not None: - return env - - # Then, try any other version - for vsver in (100, 90, 80, 71, 70): # All the versions I know - env = _get_msvc_env(vsver, x64flag) - if env is not None: return env log.error("Could not find a Microsoft Compiler") From pypy.commits at gmail.com Thu Nov 30 11:34:02 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 08:34:02 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: A branch to try to use mmap() instead of malloc() for arenas from the GC Message-ID: <5a2032fa.c7a4df0a.6e59d.4e94@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93217:75f5a5c594a8 Date: 2017-11-30 16:54 +0100 http://bitbucket.org/pypy/pypy/changeset/75f5a5c594a8/ Log: A branch to try to use mmap() instead of malloc() for arenas from the GC From pypy.commits at gmail.com Thu Nov 30 11:34:05 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 08:34:05 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: (fijal, arigo) Message-ID: <5a2032fd.8b951c0a.2f62d.aa35@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93218:6412ce4e1198 Date: 2017-11-30 17:30 +0100 http://bitbucket.org/pypy/pypy/changeset/6412ce4e1198/ Log: (fijal, arigo) Trying to use mmap() to allocate arenas diff --git a/rpython/memory/gc/minimarkpage.py b/rpython/memory/gc/minimarkpage.py --- a/rpython/memory/gc/minimarkpage.py +++ b/rpython/memory/gc/minimarkpage.py @@ -292,7 +292,7 @@ # # 'arena_base' points to the start of malloced memory; it might not # be a page-aligned address - arena_base = llarena.arena_malloc(self.arena_size, False) + arena_base = llarena.arena_mmap(self.arena_size) if not arena_base: out_of_memory("out of memory: couldn't allocate the next arena") arena_end = arena_base + self.arena_size @@ -395,8 +395,7 @@ if arena.nfreepages == arena.totalpages: # # The whole arena is empty. Free it. - llarena.arena_reset(arena.base, self.arena_size, 4) - llarena.arena_free(arena.base) + llarena.arena_munmap(arena.base, self.arena_size) lltype.free(arena, flavor='raw', track_allocation=False) # else: diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -327,6 +327,16 @@ assert not arena_addr.arena.objectptrs arena_addr.arena.mark_freed() +def arena_mmap(nbytes): + """Allocate and return a new arena, zero-initialized by the + system, calling mmap().""" + return arena_malloc(nbytes, True) + +def arena_munmap(arena_addr): + """Release an arena allocated with arena_mmap().""" + arena_free(arena_addr) + + def arena_reset(arena_addr, size, zero): """Free all objects in the arena, which can then be reused. This can also be used on a subrange of the arena. @@ -530,6 +540,30 @@ llfakeimpl=arena_free, sandboxsafe=True) +def llimpl_arena_mmap(nbytes): + from rpython.rlib import rmmap + flags = rmmap.MAP_PRIVATE | rmmap.MAP_ANONYMOUS + prot = rmmap.PROT_READ | rmmap.PROT_WRITE + p = rffi.cast(llmemory.Address, rmmap.c_mmap_safe( + lltype.nullptr(rmmap.PTR.TO), nbytes, prot, flags, -1, 0)) + if p == rffi.cast(llmemory.Address, -1): + p = rffi.cast(llmemory.Address, 0) + return p +register_external(arena_mmap, [int], llmemory.Address, + 'll_arena.arena_mmap', + llimpl=llimpl_arena_mmap, + llfakeimpl=arena_mmap, + sandboxsafe=True) + +def llimpl_arena_munmap(arena_addr, nbytes): + from rpython.rlib import rmmap + rmmap.c_munmap_safe(rffi.cast(rmmap.PTR, arena_addr), nbytes) +register_external(arena_munmap, [llmemory.Address, int], None, + 'll_arena.arena_munmap', + llimpl=llimpl_arena_munmap, + llfakeimpl=arena_munmap, + sandboxsafe=True) + def llimpl_arena_reset(arena_addr, size, zero): if zero: if zero == 1: From pypy.commits at gmail.com Thu Nov 30 11:39:34 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 08:39:34 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: test fix Message-ID: <5a203446.6b88df0a.a6cc5.4856@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93219:4c75975c98db Date: 2017-11-30 17:37 +0100 http://bitbucket.org/pypy/pypy/changeset/4c75975c98db/ Log: test fix diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -332,9 +332,10 @@ system, calling mmap().""" return arena_malloc(nbytes, True) -def arena_munmap(arena_addr): +def arena_munmap(arena_addr, nbytes): """Release an arena allocated with arena_mmap().""" arena_free(arena_addr) + assert nbytes == arena_addr.arena.nbytes def arena_reset(arena_addr, size, zero): From pypy.commits at gmail.com Thu Nov 30 11:45:03 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 30 Nov 2017 08:45:03 -0800 (PST) Subject: [pypy-commit] pypy default: backout changes that broke translation in unclear ways (thanks RPython) Message-ID: <5a20358f.06b6df0a.a3bf2.b09f@mx.google.com> Author: fijal Branch: Changeset: r93220:30c6fda0a499 Date: 2017-11-30 18:38 +0200 http://bitbucket.org/pypy/pypy/changeset/30c6fda0a499/ Log: backout changes that broke translation in unclear ways (thanks RPython) diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,6 +25,19 @@ return (1 << ((byte_size << 3) - 1)) - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -37,7 +50,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the result + # FIXME: This takes care of the instruction where box is the reuslt # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -78,8 +91,14 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - b = b1.or_bound(b2) - self.getintbound(op).intersect(b) + if b1.known_ge(IntBound(0, 0)) and \ + b2.known_ge(IntBound(0, 0)): + r = self.getintbound(op) + if b1.has_upper and b2.has_upper: + mostsignificant = b1.upper | b2.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -93,8 +112,15 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - b = b1.and_bound(b2) - self.getintbound(op).intersect(b) + r = self.getintbound(op) + pos1 = b1.known_ge(IntBound(0, 0)) + pos2 = b2.known_ge(IntBound(0, 0)) + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(b1) + if pos2: + r.make_le(b2) def optimize_INT_SUB(self, op): return self.emit(op) @@ -185,10 +211,16 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): - b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - r = self.getintbound(op) - r.intersect(b1.mod_bound(b2)) + if b2.is_constant(): + val = b2.getint() + r = self.getintbound(op) + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -404,7 +436,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_nonnegative(): + if b.known_ge(IntBound(0, 0)): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -615,7 +647,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_nonnegative(): + if b1.known_ge(IntBound(0, 0)): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,19 +12,6 @@ MAXINT = maxint MININT = -maxint - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -105,9 +92,6 @@ def known_ge(self, other): return other.known_le(self) - def known_nonnegative(self): - return self.has_lower and 0 <= self.lower - def intersect(self, other): r = False @@ -208,22 +192,10 @@ else: return IntUnbounded() - def mod_bound(self, other): - r = IntUnbounded() - if other.is_constant(): - val = other.getint() - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) - return r - def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_nonnegative() and \ + other.known_ge(IntBound(0, 0)) and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -239,7 +211,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_nonnegative() and \ + other.known_ge(IntBound(0, 0)) and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -249,31 +221,7 @@ else: return IntUnbounded() - def and_bound(self, other): - pos1 = self.known_nonnegative() - pos2 = other.known_nonnegative() - r = IntUnbounded() - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(self) - if pos2: - r.make_le(other) - return r - - def or_bound(self, other): - r = IntUnbounded() - if self.known_nonnegative() and \ - other.known_nonnegative(): - if self.has_upper and other.has_upper: - mostsignificant = self.upper | other.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) - return r - def contains(self, val): - assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -334,7 +282,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_nonnegative() and + return (self.bounded() and self.known_ge(ConstIntBound(0)) and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -349,7 +297,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_nonnegative() and \ + if self.known_ge(IntBound(0, 0)) and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,34 +1,12 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded, next_pow2_m1 + IntLowerBound, IntUnbounded +from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck +from rpython.rlib.rarithmetic import LONG_BIT -from hypothesis import given, strategies - -special_values = ( - range(-100, 100) + - [2 ** i for i in range(1, LONG_BIT)] + - [-2 ** i for i in range(1, LONG_BIT)] + - [2 ** i - 1 for i in range(1, LONG_BIT)] + - [-2 ** i - 1 for i in range(1, LONG_BIT)] + - [2 ** i + 1 for i in range(1, LONG_BIT)] + - [-2 ** i + 1 for i in range(1, LONG_BIT)] + - [sys.maxint, -sys.maxint-1]) - -special_values = strategies.sampled_from( - [int(v) for v in special_values if type(int(v)) is int]) - -ints = strategies.builds( - int, # strategies.integers sometimes returns a long? - special_values | strategies.integers( - min_value=int(-sys.maxint-1), max_value=sys.maxint)) - -ints_or_none = strategies.none() | ints - - -def bound(a, b): +def bound(a,b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -36,55 +14,11 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a, b) + return IntBound(a,b) def const(a): return bound(a,a) - -def build_bound_with_contained_number(a, b, c): - a, b, c = sorted([a, b, c]) - r = bound(a, c) - assert r.contains(b) - return r, b - -bound_with_contained_number = strategies.builds( - build_bound_with_contained_number, - ints_or_none, - ints_or_none, - ints -) - -unbounded = strategies.builds( - lambda x: (bound(None, None), int(x)), - ints -) - -lower_bounded = strategies.builds( - lambda x, y: (bound(min(x, y), None), max(x, y)), - ints, - ints -) - -upper_bounded = strategies.builds( - lambda x, y: (bound(None, max(x, y)), min(x, y)), - ints, - ints -) - -bounded = strategies.builds( - build_bound_with_contained_number, - ints, ints, ints -) - -constant = strategies.builds( - lambda x: (const(x), x), - ints -) - -bound_with_contained_number = strategies.one_of( - unbounded, lower_bounded, upper_bounded, constant, bounded) - def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -306,6 +240,8 @@ def test_div_bound(): + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -325,15 +261,6 @@ assert a.contains(-3) assert a.contains(0) -def test_mod_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.mod_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - if n2 != 0: - assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -348,25 +275,6 @@ assert not a.contains(-1) assert not a.contains(4) -def test_and_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.and_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - assert b3.contains(n1 & n2) - -def test_or_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.or_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - assert b3.contains(n1 | n2) - assert b3.contains(n1 ^ n2) # we use it for xor too - def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -377,82 +285,3 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 - - - at given(bound_with_contained_number, bound_with_contained_number) -def test_add_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - print b1, n1 - print b2, n2 - b3 = b1.add_bound(b2) - try: - r = ovfcheck(n1 + n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_sub_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - print b1, n1 - print b2, n2 - b3 = b1.sub_bound(b2) - try: - r = ovfcheck(n1 - n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_mul_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.mul_bound(b2) - try: - r = ovfcheck(n1 * n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_div_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.py_div_bound(b2) - if n1 == -sys.maxint-1 and n2 == -1: - return # overflow - if n2 != 0: - assert b3.contains(n1 / n2) # Python-style div - - at given(bound_with_contained_number, bound_with_contained_number) -def test_mod_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.mod_bound(b2) - if n1 == -sys.maxint-1 and n2 == -1: - return # overflow - if n2 != 0: - assert b3.contains(n1 % n2) # Python-style mod - - at given(bound_with_contained_number, bound_with_contained_number) -def test_and_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.and_bound(b2) - r = n1 & n2 - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_or_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.or_bound(b2) - r = n1 | n2 - assert b3.contains(r) - r = n1 ^ n2 - assert b3.contains(r) From pypy.commits at gmail.com Thu Nov 30 11:45:05 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 30 Nov 2017 08:45:05 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: merge default Message-ID: <5a203591.13811c0a.a506f.122f@mx.google.com> Author: fijal Branch: mmap-for-arenas Changeset: r93221:6fb9f1a724da Date: 2017-11-30 18:38 +0200 http://bitbucket.org/pypy/pypy/changeset/6fb9f1a724da/ Log: merge default diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,6 +25,19 @@ return (1 << ((byte_size << 3) - 1)) - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -37,7 +50,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the result + # FIXME: This takes care of the instruction where box is the reuslt # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -78,8 +91,14 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - b = b1.or_bound(b2) - self.getintbound(op).intersect(b) + if b1.known_ge(IntBound(0, 0)) and \ + b2.known_ge(IntBound(0, 0)): + r = self.getintbound(op) + if b1.has_upper and b2.has_upper: + mostsignificant = b1.upper | b2.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -93,8 +112,15 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - b = b1.and_bound(b2) - self.getintbound(op).intersect(b) + r = self.getintbound(op) + pos1 = b1.known_ge(IntBound(0, 0)) + pos2 = b2.known_ge(IntBound(0, 0)) + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(b1) + if pos2: + r.make_le(b2) def optimize_INT_SUB(self, op): return self.emit(op) @@ -185,10 +211,16 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): - b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - r = self.getintbound(op) - r.intersect(b1.mod_bound(b2)) + if b2.is_constant(): + val = b2.getint() + r = self.getintbound(op) + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -404,7 +436,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_nonnegative(): + if b.known_ge(IntBound(0, 0)): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -615,7 +647,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_nonnegative(): + if b1.known_ge(IntBound(0, 0)): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,19 +12,6 @@ MAXINT = maxint MININT = -maxint - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -105,9 +92,6 @@ def known_ge(self, other): return other.known_le(self) - def known_nonnegative(self): - return self.has_lower and 0 <= self.lower - def intersect(self, other): r = False @@ -208,22 +192,10 @@ else: return IntUnbounded() - def mod_bound(self, other): - r = IntUnbounded() - if other.is_constant(): - val = other.getint() - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) - return r - def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_nonnegative() and \ + other.known_ge(IntBound(0, 0)) and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -239,7 +211,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_nonnegative() and \ + other.known_ge(IntBound(0, 0)) and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -249,31 +221,7 @@ else: return IntUnbounded() - def and_bound(self, other): - pos1 = self.known_nonnegative() - pos2 = other.known_nonnegative() - r = IntUnbounded() - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(self) - if pos2: - r.make_le(other) - return r - - def or_bound(self, other): - r = IntUnbounded() - if self.known_nonnegative() and \ - other.known_nonnegative(): - if self.has_upper and other.has_upper: - mostsignificant = self.upper | other.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) - return r - def contains(self, val): - assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -334,7 +282,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_nonnegative() and + return (self.bounded() and self.known_ge(ConstIntBound(0)) and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -349,7 +297,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_nonnegative() and \ + if self.known_ge(IntBound(0, 0)) and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,34 +1,12 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded, next_pow2_m1 + IntLowerBound, IntUnbounded +from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck +from rpython.rlib.rarithmetic import LONG_BIT -from hypothesis import given, strategies - -special_values = ( - range(-100, 100) + - [2 ** i for i in range(1, LONG_BIT)] + - [-2 ** i for i in range(1, LONG_BIT)] + - [2 ** i - 1 for i in range(1, LONG_BIT)] + - [-2 ** i - 1 for i in range(1, LONG_BIT)] + - [2 ** i + 1 for i in range(1, LONG_BIT)] + - [-2 ** i + 1 for i in range(1, LONG_BIT)] + - [sys.maxint, -sys.maxint-1]) - -special_values = strategies.sampled_from( - [int(v) for v in special_values if type(int(v)) is int]) - -ints = strategies.builds( - int, # strategies.integers sometimes returns a long? - special_values | strategies.integers( - min_value=int(-sys.maxint-1), max_value=sys.maxint)) - -ints_or_none = strategies.none() | ints - - -def bound(a, b): +def bound(a,b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -36,55 +14,11 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a, b) + return IntBound(a,b) def const(a): return bound(a,a) - -def build_bound_with_contained_number(a, b, c): - a, b, c = sorted([a, b, c]) - r = bound(a, c) - assert r.contains(b) - return r, b - -bound_with_contained_number = strategies.builds( - build_bound_with_contained_number, - ints_or_none, - ints_or_none, - ints -) - -unbounded = strategies.builds( - lambda x: (bound(None, None), int(x)), - ints -) - -lower_bounded = strategies.builds( - lambda x, y: (bound(min(x, y), None), max(x, y)), - ints, - ints -) - -upper_bounded = strategies.builds( - lambda x, y: (bound(None, max(x, y)), min(x, y)), - ints, - ints -) - -bounded = strategies.builds( - build_bound_with_contained_number, - ints, ints, ints -) - -constant = strategies.builds( - lambda x: (const(x), x), - ints -) - -bound_with_contained_number = strategies.one_of( - unbounded, lower_bounded, upper_bounded, constant, bounded) - def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -306,6 +240,8 @@ def test_div_bound(): + from rpython.rtyper.lltypesystem import lltype + from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -325,15 +261,6 @@ assert a.contains(-3) assert a.contains(0) -def test_mod_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.mod_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - if n2 != 0: - assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -348,25 +275,6 @@ assert not a.contains(-1) assert not a.contains(4) -def test_and_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.and_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - assert b3.contains(n1 & n2) - -def test_or_bound(): - for _, _, b1 in some_bounds(): - for _, _, b2 in some_bounds(): - b3 = b1.or_bound(b2) - for n1 in nbr: - for n2 in nbr: - if b1.contains(n1) and b2.contains(n2): - assert b3.contains(n1 | n2) - assert b3.contains(n1 ^ n2) # we use it for xor too - def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -377,82 +285,3 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 - - - at given(bound_with_contained_number, bound_with_contained_number) -def test_add_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - print b1, n1 - print b2, n2 - b3 = b1.add_bound(b2) - try: - r = ovfcheck(n1 + n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_sub_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - print b1, n1 - print b2, n2 - b3 = b1.sub_bound(b2) - try: - r = ovfcheck(n1 - n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_mul_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.mul_bound(b2) - try: - r = ovfcheck(n1 * n2) - except OverflowError: - assert not b3.bounded() - else: - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_div_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.py_div_bound(b2) - if n1 == -sys.maxint-1 and n2 == -1: - return # overflow - if n2 != 0: - assert b3.contains(n1 / n2) # Python-style div - - at given(bound_with_contained_number, bound_with_contained_number) -def test_mod_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.mod_bound(b2) - if n1 == -sys.maxint-1 and n2 == -1: - return # overflow - if n2 != 0: - assert b3.contains(n1 % n2) # Python-style mod - - at given(bound_with_contained_number, bound_with_contained_number) -def test_and_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.and_bound(b2) - r = n1 & n2 - assert b3.contains(r) - - at given(bound_with_contained_number, bound_with_contained_number) -def test_or_bound_random(t1, t2): - b1, n1 = t1 - b2, n2 = t2 - b3 = b1.or_bound(b2) - r = n1 | n2 - assert b3.contains(r) - r = n1 ^ n2 - assert b3.contains(r) From pypy.commits at gmail.com Thu Nov 30 11:45:07 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 30 Nov 2017 08:45:07 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: merge Message-ID: <5a203593.8283df0a.d8245.0f49@mx.google.com> Author: fijal Branch: mmap-for-arenas Changeset: r93222:2e594f3e5237 Date: 2017-11-30 18:44 +0200 http://bitbucket.org/pypy/pypy/changeset/2e594f3e5237/ Log: merge diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -332,9 +332,10 @@ system, calling mmap().""" return arena_malloc(nbytes, True) -def arena_munmap(arena_addr): +def arena_munmap(arena_addr, nbytes): """Release an arena allocated with arena_mmap().""" arena_free(arena_addr) + assert nbytes == arena_addr.arena.nbytes def arena_reset(arena_addr, size, zero): From pypy.commits at gmail.com Thu Nov 30 11:49:31 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 08:49:31 -0800 (PST) Subject: [pypy-commit] pypy default: Re-revert 30c6fda0a499, and add the proper fix, hopefully Message-ID: <5a20369b.b198df0a.958c3.16c4@mx.google.com> Author: Armin Rigo Branch: Changeset: r93223:e6c7a428f649 Date: 2017-11-30 17:48 +0100 http://bitbucket.org/pypy/pypy/changeset/e6c7a428f649/ Log: Re-revert 30c6fda0a499, and add the proper fix, hopefully diff --git a/rpython/jit/metainterp/optimizeopt/intbounds.py b/rpython/jit/metainterp/optimizeopt/intbounds.py --- a/rpython/jit/metainterp/optimizeopt/intbounds.py +++ b/rpython/jit/metainterp/optimizeopt/intbounds.py @@ -25,19 +25,6 @@ return (1 << ((byte_size << 3) - 1)) - 1 -IS_64_BIT = sys.maxint > 2**32 - -def next_pow2_m1(n): - """Calculate next power of 2 greater than n minus one.""" - n |= n >> 1 - n |= n >> 2 - n |= n >> 4 - n |= n >> 8 - n |= n >> 16 - if IS_64_BIT: - n |= n >> 32 - return n - class OptIntBounds(Optimization): """Keeps track of the bounds placed on integers by guards and remove @@ -50,7 +37,7 @@ return dispatch_postprocess(self, op) def propagate_bounds_backward(self, box): - # FIXME: This takes care of the instruction where box is the reuslt + # FIXME: This takes care of the instruction where box is the result # but the bounds produced by all instructions where box is # an argument might also be tighten b = self.getintbound(box) @@ -91,14 +78,8 @@ b1 = self.getintbound(v1) v2 = self.get_box_replacement(op.getarg(1)) b2 = self.getintbound(v2) - if b1.known_ge(IntBound(0, 0)) and \ - b2.known_ge(IntBound(0, 0)): - r = self.getintbound(op) - if b1.has_upper and b2.has_upper: - mostsignificant = b1.upper | b2.upper - r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) - else: - r.make_ge(IntBound(0, 0)) + b = b1.or_bound(b2) + self.getintbound(op).intersect(b) optimize_INT_OR = optimize_INT_OR_or_XOR optimize_INT_XOR = optimize_INT_OR_or_XOR @@ -112,15 +93,8 @@ def postprocess_INT_AND(self, op): b1 = self.getintbound(op.getarg(0)) b2 = self.getintbound(op.getarg(1)) - r = self.getintbound(op) - pos1 = b1.known_ge(IntBound(0, 0)) - pos2 = b2.known_ge(IntBound(0, 0)) - if pos1 or pos2: - r.make_ge(IntBound(0, 0)) - if pos1: - r.make_le(b1) - if pos2: - r.make_le(b2) + b = b1.and_bound(b2) + self.getintbound(op).intersect(b) def optimize_INT_SUB(self, op): return self.emit(op) @@ -211,16 +185,10 @@ r.intersect(b1.py_div_bound(b2)) def post_call_INT_PY_MOD(self, op): + b1 = self.getintbound(op.getarg(1)) b2 = self.getintbound(op.getarg(2)) - if b2.is_constant(): - val = b2.getint() - r = self.getintbound(op) - if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos - r.make_ge(IntBound(0, 0)) - r.make_lt(IntBound(val, val)) - else: # with Python's modulo: neg < (x % neg) <= 0 - r.make_gt(IntBound(val, val)) - r.make_le(IntBound(0, 0)) + r = self.getintbound(op) + r.intersect(b1.mod_bound(b2)) def optimize_INT_LSHIFT(self, op): return self.emit(op) @@ -436,7 +404,7 @@ def optimize_INT_FORCE_GE_ZERO(self, op): b = self.getintbound(op.getarg(0)) - if b.known_ge(IntBound(0, 0)): + if b.known_nonnegative(): self.make_equal_to(op, op.getarg(0)) else: return self.emit(op) @@ -647,7 +615,7 @@ if r.is_constant(): if r.getint() == valnonzero: b1 = self.getintbound(op.getarg(0)) - if b1.known_ge(IntBound(0, 0)): + if b1.known_nonnegative(): b1.make_gt(IntBound(0, 0)) self.propagate_bounds_backward(op.getarg(0)) elif r.getint() == valzero: diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -12,6 +12,19 @@ MAXINT = maxint MININT = -maxint - 1 +IS_64_BIT = sys.maxint > 2**32 + +def next_pow2_m1(n): + """Calculate next power of 2 greater than n minus one.""" + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + if IS_64_BIT: + n |= n >> 32 + return n + class IntBound(AbstractInfo): _attrs_ = ('has_upper', 'has_lower', 'upper', 'lower') @@ -92,6 +105,9 @@ def known_ge(self, other): return other.known_le(self) + def known_nonnegative(self): + return self.has_lower and 0 <= self.lower + def intersect(self, other): r = False @@ -192,10 +208,22 @@ else: return IntUnbounded() + def mod_bound(self, other): + r = IntUnbounded() + if other.is_constant(): + val = other.getint() + if val >= 0: # with Python's modulo: 0 <= (x % pos) < pos + r.make_ge(IntBound(0, 0)) + r.make_lt(IntBound(val, val)) + else: # with Python's modulo: neg < (x % neg) <= 0 + r.make_gt(IntBound(val, val)) + r.make_le(IntBound(0, 0)) + return r + def lshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): try: vals = (ovfcheck(self.upper << other.upper), @@ -211,7 +239,7 @@ def rshift_bound(self, other): if self.has_upper and self.has_lower and \ other.has_upper and other.has_lower and \ - other.known_ge(IntBound(0, 0)) and \ + other.known_nonnegative() and \ other.known_lt(IntBound(LONG_BIT, LONG_BIT)): vals = (self.upper >> other.upper, self.upper >> other.lower, @@ -221,7 +249,32 @@ else: return IntUnbounded() + def and_bound(self, other): + pos1 = self.known_nonnegative() + pos2 = other.known_nonnegative() + r = IntUnbounded() + if pos1 or pos2: + r.make_ge(IntBound(0, 0)) + if pos1: + r.make_le(self) + if pos2: + r.make_le(other) + return r + + def or_bound(self, other): + r = IntUnbounded() + if self.known_nonnegative() and \ + other.known_nonnegative(): + if self.has_upper and other.has_upper: + mostsignificant = self.upper | other.upper + r.intersect(IntBound(0, next_pow2_m1(mostsignificant))) + else: + r.make_ge(IntBound(0, 0)) + return r + def contains(self, val): + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT): @@ -282,7 +335,7 @@ guards.append(op) def is_bool(self): - return (self.bounded() and self.known_ge(ConstIntBound(0)) and + return (self.bounded() and self.known_nonnegative() and self.known_le(ConstIntBound(1))) def make_bool(self): @@ -297,7 +350,7 @@ if self.known_gt(IntBound(0, 0)) or \ self.known_lt(IntBound(0, 0)): return INFO_NONNULL - if self.known_ge(IntBound(0, 0)) and \ + if self.known_nonnegative() and \ self.known_le(IntBound(0, 0)): return INFO_NULL return INFO_UNKNOWN diff --git a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py --- a/rpython/jit/metainterp/optimizeopt/test/test_intbound.py +++ b/rpython/jit/metainterp/optimizeopt/test/test_intbound.py @@ -1,12 +1,34 @@ from rpython.jit.metainterp.optimizeopt.intutils import IntBound, IntUpperBound, \ - IntLowerBound, IntUnbounded -from rpython.jit.metainterp.optimizeopt.intbounds import next_pow2_m1 + IntLowerBound, IntUnbounded, next_pow2_m1 from copy import copy import sys -from rpython.rlib.rarithmetic import LONG_BIT +from rpython.rlib.rarithmetic import LONG_BIT, ovfcheck -def bound(a,b): +from hypothesis import given, strategies + +special_values = ( + range(-100, 100) + + [2 ** i for i in range(1, LONG_BIT)] + + [-2 ** i for i in range(1, LONG_BIT)] + + [2 ** i - 1 for i in range(1, LONG_BIT)] + + [-2 ** i - 1 for i in range(1, LONG_BIT)] + + [2 ** i + 1 for i in range(1, LONG_BIT)] + + [-2 ** i + 1 for i in range(1, LONG_BIT)] + + [sys.maxint, -sys.maxint-1]) + +special_values = strategies.sampled_from( + [int(v) for v in special_values if type(int(v)) is int]) + +ints = strategies.builds( + int, # strategies.integers sometimes returns a long? + special_values | strategies.integers( + min_value=int(-sys.maxint-1), max_value=sys.maxint)) + +ints_or_none = strategies.none() | ints + + +def bound(a, b): if a is None and b is None: return IntUnbounded() elif a is None: @@ -14,11 +36,55 @@ elif b is None: return IntLowerBound(a) else: - return IntBound(a,b) + return IntBound(a, b) def const(a): return bound(a,a) + +def build_bound_with_contained_number(a, b, c): + a, b, c = sorted([a, b, c]) + r = bound(a, c) + assert r.contains(b) + return r, b + +bound_with_contained_number = strategies.builds( + build_bound_with_contained_number, + ints_or_none, + ints_or_none, + ints +) + +unbounded = strategies.builds( + lambda x: (bound(None, None), int(x)), + ints +) + +lower_bounded = strategies.builds( + lambda x, y: (bound(min(x, y), None), max(x, y)), + ints, + ints +) + +upper_bounded = strategies.builds( + lambda x, y: (bound(None, max(x, y)), min(x, y)), + ints, + ints +) + +bounded = strategies.builds( + build_bound_with_contained_number, + ints, ints, ints +) + +constant = strategies.builds( + lambda x: (const(x), x), + ints +) + +bound_with_contained_number = strategies.one_of( + unbounded, lower_bounded, upper_bounded, constant, bounded) + def some_bounds(): brd = [None] + range(-2, 3) for lower in brd: @@ -240,8 +306,6 @@ def test_div_bound(): - from rpython.rtyper.lltypesystem import lltype - from rpython.rtyper.lltypesystem.lloperation import llop for _, _, b1 in some_bounds(): for _, _, b2 in some_bounds(): b3 = b1.py_div_bound(b2) @@ -261,6 +325,15 @@ assert a.contains(-3) assert a.contains(0) +def test_mod_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.mod_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style div def test_sub_bound(): for _, _, b1 in some_bounds(): @@ -275,6 +348,25 @@ assert not a.contains(-1) assert not a.contains(4) +def test_and_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.and_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 & n2) + +def test_or_bound(): + for _, _, b1 in some_bounds(): + for _, _, b2 in some_bounds(): + b3 = b1.or_bound(b2) + for n1 in nbr: + for n2 in nbr: + if b1.contains(n1) and b2.contains(n2): + assert b3.contains(n1 | n2) + assert b3.contains(n1 ^ n2) # we use it for xor too + def test_next_pow2_m1(): assert next_pow2_m1(0) == 0 @@ -285,3 +377,82 @@ assert next_pow2_m1(80) == 127 assert next_pow2_m1((1 << 32) - 5) == (1 << 32) - 1 assert next_pow2_m1((1 << 64) - 1) == (1 << 64) - 1 + + + at given(bound_with_contained_number, bound_with_contained_number) +def test_add_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.add_bound(b2) + try: + r = ovfcheck(n1 + n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_sub_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + print b1, n1 + print b2, n2 + b3 = b1.sub_bound(b2) + try: + r = ovfcheck(n1 - n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mul_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mul_bound(b2) + try: + r = ovfcheck(n1 * n2) + except OverflowError: + assert not b3.bounded() + else: + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_div_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.py_div_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 / n2) # Python-style div + + at given(bound_with_contained_number, bound_with_contained_number) +def test_mod_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.mod_bound(b2) + if n1 == -sys.maxint-1 and n2 == -1: + return # overflow + if n2 != 0: + assert b3.contains(n1 % n2) # Python-style mod + + at given(bound_with_contained_number, bound_with_contained_number) +def test_and_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.and_bound(b2) + r = n1 & n2 + assert b3.contains(r) + + at given(bound_with_contained_number, bound_with_contained_number) +def test_or_bound_random(t1, t2): + b1, n1 = t1 + b2, n2 = t2 + b3 = b1.or_bound(b2) + r = n1 | n2 + assert b3.contains(r) + r = n1 ^ n2 + assert b3.contains(r) From pypy.commits at gmail.com Thu Nov 30 12:18:36 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 09:18:36 -0800 (PST) Subject: [pypy-commit] pypy mmap-for-arenas: translation fix Message-ID: <5a203d6c.020a1c0a.c7afa.edad@mx.google.com> Author: Armin Rigo Branch: mmap-for-arenas Changeset: r93224:40ad6dbda37b Date: 2017-11-30 18:18 +0100 http://bitbucket.org/pypy/pypy/changeset/40ad6dbda37b/ Log: translation fix diff --git a/rpython/rtyper/lltypesystem/llarena.py b/rpython/rtyper/lltypesystem/llarena.py --- a/rpython/rtyper/lltypesystem/llarena.py +++ b/rpython/rtyper/lltypesystem/llarena.py @@ -558,6 +558,7 @@ def llimpl_arena_munmap(arena_addr, nbytes): from rpython.rlib import rmmap + assert nbytes >= 0 rmmap.c_munmap_safe(rffi.cast(rmmap.PTR, arena_addr), nbytes) register_external(arena_munmap, [llmemory.Address, int], None, 'll_arena.arena_munmap', From pypy.commits at gmail.com Thu Nov 30 13:49:52 2017 From: pypy.commits at gmail.com (fijal) Date: Thu, 30 Nov 2017 10:49:52 -0800 (PST) Subject: [pypy-commit] pypy memory-accounting: merge mmap-for-llarena Message-ID: <5a2052d0.e4a6df0a.26087.3d34@mx.google.com> Author: fijal Branch: memory-accounting Changeset: r93225:5ba0bf0bf684 Date: 2017-11-30 20:49 +0200 http://bitbucket.org/pypy/pypy/changeset/5ba0bf0bf684/ Log: merge mmap-for-llarena diff too long, truncating to 2000 out of 18247 lines diff --git a/.hgignore b/.hgignore --- a/.hgignore +++ b/.hgignore @@ -71,6 +71,8 @@ ^lib_pypy/.+.c$ ^lib_pypy/.+.o$ ^lib_pypy/.+.so$ +^lib_pypy/.+.pyd$ +^lib_pypy/Release/ ^pypy/doc/discussion/.+\.html$ ^include/.+\.h$ ^include/.+\.inl$ diff --git a/.hgtags b/.hgtags --- a/.hgtags +++ b/.hgtags @@ -40,3 +40,7 @@ 2875f328eae2216a87f3d6f335092832eb031f56 release-pypy3.5-v5.7.1 c925e73810367cd960a32592dd7f728f436c125c release-pypy2.7-v5.8.0 a37ecfe5f142bc971a86d17305cc5d1d70abec64 release-pypy3.5-v5.8.0 +03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 +d72f9800a42b46a8056951b1da2426d2c2d8d502 release-pypy3.5-v5.9.0 +03d614975835870da65ff0481e1edad68ebbcb8d release-pypy2.7-v5.9.0 +84a2f3e6a7f88f2fe698e473998755b3bd1a12e2 release-pypy2.7-v5.9.0 diff --git a/_pytest/terminal.py b/_pytest/terminal.py --- a/_pytest/terminal.py +++ b/_pytest/terminal.py @@ -366,11 +366,11 @@ EXIT_OK, EXIT_TESTSFAILED, EXIT_INTERRUPTED, EXIT_USAGEERROR, EXIT_NOTESTSCOLLECTED) if exitstatus in summary_exit_codes: - self.config.hook.pytest_terminal_summary(terminalreporter=self) self.summary_errors() self.summary_failures() self.summary_warnings() self.summary_passes() + self.config.hook.pytest_terminal_summary(terminalreporter=self) if exitstatus == EXIT_INTERRUPTED: self._report_keyboardinterrupt() del self._keyboardinterrupt_memo diff --git a/extra_tests/requirements.txt b/extra_tests/requirements.txt new file mode 100644 --- /dev/null +++ b/extra_tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +hypothesis diff --git a/extra_tests/test_bytes.py b/extra_tests/test_bytes.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_bytes.py @@ -0,0 +1,84 @@ +from hypothesis import strategies as st +from hypothesis import given, example + +st_bytestring = st.binary() | st.binary().map(bytearray) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st_bytestring, st_bytestring, st_bytestring) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st_bytestring, st_bytestring) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_startswith_start(u, v, start): + expected = u[start:].startswith(v) if v else (start <= len(u)) + assert u.startswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st_bytestring, st_bytestring) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(b'x', b'', 1) + at example(b'x', b'', 2) + at given(st_bytestring, st_bytestring, st.integers()) +def test_endswith_2(u, v, start): + expected = u[start:].endswith(v) if v else (start <= len(u)) + assert u.endswith(v, start) is expected + + at example(b'x', b'', 1, 0) + at example(b'xx', b'', -1, 0) + at given(st_bytestring, st_bytestring, st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/extra_tests/test_textio.py b/extra_tests/test_textio.py new file mode 100644 --- /dev/null +++ b/extra_tests/test_textio.py @@ -0,0 +1,48 @@ +from hypothesis import given, strategies as st + +from io import BytesIO, TextIOWrapper +import os + +def translate_newlines(text): + text = text.replace('\r\n', '\n') + text = text.replace('\r', '\n') + return text.replace('\n', os.linesep) + + at st.composite +def st_readline_universal( + draw, st_nlines=st.integers(min_value=0, max_value=10)): + n_lines = draw(st_nlines) + lines = draw(st.lists( + st.text(st.characters(blacklist_characters='\r\n')), + min_size=n_lines, max_size=n_lines)) + limits = [] + for line in lines: + limit = draw(st.integers(min_value=0, max_value=len(line) + 5)) + limits.append(limit) + limits.append(-1) + endings = draw(st.lists( + st.sampled_from(['\n', '\r', '\r\n']), + min_size=n_lines, max_size=n_lines)) + return ( + ''.join(line + ending for line, ending in zip(lines, endings)), + limits) + + at given(data=st_readline_universal(), + mode=st.sampled_from(['\r', '\n', '\r\n', '', None])) +def test_readline(data, mode): + txt, limits = data + textio = TextIOWrapper( + BytesIO(txt.encode('utf-8', 'surrogatepass')), + encoding='utf-8', errors='surrogatepass', newline=mode) + lines = [] + for limit in limits: + line = textio.readline(limit) + if limit >= 0: + assert len(line) <= limit + if line: + lines.append(line) + elif limit: + break + if mode is None: + txt = translate_newlines(txt) + assert txt.startswith(u''.join(lines)) diff --git a/extra_tests/test_unicode.py b/extra_tests/test_unicode.py --- a/extra_tests/test_unicode.py +++ b/extra_tests/test_unicode.py @@ -1,3 +1,4 @@ +import sys import pytest from hypothesis import strategies as st from hypothesis import given, settings, example @@ -32,3 +33,89 @@ @given(s=st.text()) def test_composition(s, norm1, norm2, norm3): assert normalize(norm2, normalize(norm1, s)) == normalize(norm3, s) + + at given(st.text(), st.text(), st.text()) +def test_find(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.find(u) <= len(prefix) + assert s.find(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_index(u, prefix, suffix): + s = prefix + u + suffix + assert 0 <= s.index(u) <= len(prefix) + assert s.index(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rfind(u, prefix, suffix): + s = prefix + u + suffix + assert s.rfind(u) >= len(prefix) + assert s.rfind(u, len(prefix), len(s) - len(suffix)) == len(prefix) + + at given(st.text(), st.text(), st.text()) +def test_rindex(u, prefix, suffix): + s = prefix + u + suffix + assert s.rindex(u) >= len(prefix) + assert s.rindex(u, len(prefix), len(s) - len(suffix)) == len(prefix) + +def adjust_indices(u, start, end): + if end < 0: + end = max(end + len(u), 0) + else: + end = min(end, len(u)) + if start < 0: + start = max(start + len(u), 0) + return start, end + + at given(st.text(), st.text()) +def test_startswith_basic(u, v): + assert u.startswith(v) is (u[:len(v)] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_startswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].startswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.startswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_startswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].startswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.startswith(v, start, end) is expected + + at given(st.text(), st.text()) +def test_endswith_basic(u, v): + if len(v) > len(u): + assert u.endswith(v) is False + else: + assert u.endswith(v) is (u[len(u) - len(v):] == v) + + at example(u'x', u'', 1) + at example(u'x', u'', 2) + at given(st.text(), st.text(), st.integers()) +def test_endswith_2(u, v, start): + if v or sys.version_info[0] == 2: + expected = u[start:].endswith(v) + else: # CPython leaks implementation details in this case + expected = start <= len(u) + assert u.endswith(v, start) is expected + + at example(u'x', u'', 1, 0) + at example(u'xx', u'', -1, 0) + at given(st.text(), st.text(), st.integers(), st.integers()) +def test_endswith_3(u, v, start, end): + if v or sys.version_info[0] == 2: + expected = u[start:end].endswith(v) + else: # CPython leaks implementation details in this case + start0, end0 = adjust_indices(u, start, end) + expected = start0 <= len(u) and start0 <= end0 + assert u.endswith(v, start, end) is expected diff --git a/lib-python/2.7/ctypes/__init__.py b/lib-python/2.7/ctypes/__init__.py --- a/lib-python/2.7/ctypes/__init__.py +++ b/lib-python/2.7/ctypes/__init__.py @@ -360,14 +360,15 @@ self._FuncPtr = _FuncPtr if handle is None: - if flags & _FUNCFLAG_CDECL: - pypy_dll = _ffi.CDLL(name, mode) - else: - pypy_dll = _ffi.WinDLL(name, mode) - self.__pypy_dll__ = pypy_dll - handle = int(pypy_dll) - if _sys.maxint > 2 ** 32: - handle = int(handle) # long -> int + handle = 0 + if flags & _FUNCFLAG_CDECL: + pypy_dll = _ffi.CDLL(name, mode, handle) + else: + pypy_dll = _ffi.WinDLL(name, mode, handle) + self.__pypy_dll__ = pypy_dll + handle = int(pypy_dll) + if _sys.maxint > 2 ** 32: + handle = int(handle) # long -> int self._handle = handle def __repr__(self): diff --git a/lib-python/2.7/inspect.py b/lib-python/2.7/inspect.py --- a/lib-python/2.7/inspect.py +++ b/lib-python/2.7/inspect.py @@ -40,6 +40,10 @@ import linecache from operator import attrgetter from collections import namedtuple +try: + from cpyext import is_cpyext_function as _is_cpyext_function +except ImportError: + _is_cpyext_function = lambda obj: False # These constants are from Include/code.h. CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS = 0x1, 0x2, 0x4, 0x8 @@ -230,7 +234,7 @@ __doc__ documentation string __name__ original name of this function or method __self__ instance to which a method is bound, or None""" - return isinstance(object, types.BuiltinFunctionType) + return isinstance(object, types.BuiltinFunctionType) or _is_cpyext_function(object) def isroutine(object): """Return true if the object is any kind of function or method.""" diff --git a/lib-python/2.7/test/test_urllib2net.py b/lib-python/2.7/test/test_urllib2net.py --- a/lib-python/2.7/test/test_urllib2net.py +++ b/lib-python/2.7/test/test_urllib2net.py @@ -286,7 +286,7 @@ self.assertEqual(u.fp._sock.fp._sock.gettimeout(), 120) u.close() - FTP_HOST = 'ftp://ftp.debian.org/debian/' + FTP_HOST = 'ftp://www.pythontest.net/' def test_ftp_basic(self): self.assertIsNone(socket.getdefaulttimeout()) diff --git a/lib-python/2.7/warnings.py b/lib-python/2.7/warnings.py --- a/lib-python/2.7/warnings.py +++ b/lib-python/2.7/warnings.py @@ -43,11 +43,12 @@ unicodetype = unicode except NameError: unicodetype = () + template = "%s: %s: %s\n" try: message = str(message) except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) + template = unicode(template) + s = template % (lineno, category.__name__, message) line = linecache.getline(filename, lineno) if line is None else line if line: line = line.strip() diff --git a/lib_pypy/_ctypes/array.py b/lib_pypy/_ctypes/array.py --- a/lib_pypy/_ctypes/array.py +++ b/lib_pypy/_ctypes/array.py @@ -8,60 +8,63 @@ class ArrayMeta(_CDataMeta): def __new__(self, name, cls, typedict): res = type.__new__(self, name, cls, typedict) - if '_type_' in typedict: - ffiarray = _rawffi.Array(typedict['_type_']._ffishape_) - res._ffiarray = ffiarray - subletter = getattr(typedict['_type_'], '_type_', None) - if subletter == 'c': - def getvalue(self): - return _rawffi.charp2string(self._buffer.buffer, - self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, str): - _rawffi.rawstring2charp(self._buffer.buffer, val) - else: - for i in range(len(val)): - self[i] = val[i] - if len(val) < self._length_: - self._buffer[len(val)] = '\x00' - res.value = property(getvalue, setvalue) - def getraw(self): - return _rawffi.charp2rawstring(self._buffer.buffer, - self._length_) + if cls == (_CData,): # this is the Array class defined below + res._ffiarray = None + return res + if not hasattr(res, '_length_') or not isinstance(res._length_, int): + raise AttributeError( + "class must define a '_length_' attribute, " + "which must be a positive integer") + ffiarray = res._ffiarray = _rawffi.Array(res._type_._ffishape_) + subletter = getattr(res._type_, '_type_', None) + if subletter == 'c': + def getvalue(self): + return _rawffi.charp2string(self._buffer.buffer, + self._length_) + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, str): + _rawffi.rawstring2charp(self._buffer.buffer, val) + else: + for i in range(len(val)): + self[i] = val[i] + if len(val) < self._length_: + self._buffer[len(val)] = b'\x00' + res.value = property(getvalue, setvalue) - def setraw(self, buffer): - if len(buffer) > self._length_: - raise ValueError("%r too long" % (buffer,)) - _rawffi.rawstring2charp(self._buffer.buffer, buffer) - res.raw = property(getraw, setraw) - elif subletter == 'u': - def getvalue(self): - return _rawffi.wcharp2unicode(self._buffer.buffer, - self._length_) + def getraw(self): + return _rawffi.charp2rawstring(self._buffer.buffer, + self._length_) - def setvalue(self, val): - # we don't want to have buffers here - if len(val) > self._length_: - raise ValueError("%r too long" % (val,)) - if isinstance(val, unicode): - target = self._buffer - else: - target = self - for i in range(len(val)): - target[i] = val[i] - if len(val) < self._length_: - target[len(val)] = u'\x00' - res.value = property(getvalue, setvalue) - - if '_length_' in typedict: - res._ffishape_ = (ffiarray, typedict['_length_']) - res._fficompositesize_ = res._sizeofinstances() - else: - res._ffiarray = None + def setraw(self, buffer): + if len(buffer) > self._length_: + raise ValueError("%r too long" % (buffer,)) + _rawffi.rawstring2charp(self._buffer.buffer, buffer) + res.raw = property(getraw, setraw) + elif subletter == 'u': + def getvalue(self): + return _rawffi.wcharp2unicode(self._buffer.buffer, + self._length_) + + def setvalue(self, val): + # we don't want to have buffers here + if len(val) > self._length_: + raise ValueError("%r too long" % (val,)) + if isinstance(val, unicode): + target = self._buffer + else: + target = self + for i in range(len(val)): + target[i] = val[i] + if len(val) < self._length_: + target[len(val)] = u'\x00' + res.value = property(getvalue, setvalue) + + res._ffishape_ = (ffiarray, res._length_) + res._fficompositesize_ = res._sizeofinstances() return res from_address = cdata_from_address @@ -156,7 +159,7 @@ l = [self[i] for i in range(start, stop, step)] letter = getattr(self._type_, '_type_', None) if letter == 'c': - return "".join(l) + return b"".join(l) if letter == 'u': return u"".join(l) return l diff --git a/lib_pypy/_ctypes/basics.py b/lib_pypy/_ctypes/basics.py --- a/lib_pypy/_ctypes/basics.py +++ b/lib_pypy/_ctypes/basics.py @@ -176,6 +176,10 @@ def _get_buffer_value(self): return self._buffer[0] + def _copy_to(self, addr): + target = type(self).from_address(addr)._buffer + target[0] = self._get_buffer_value() + def _to_ffi_param(self): if self.__class__._is_pointer_like(): return self._get_buffer_value() diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py --- a/lib_pypy/_ctypes/pointer.py +++ b/lib_pypy/_ctypes/pointer.py @@ -114,7 +114,9 @@ cobj = self._type_.from_param(value) if ensure_objects(cobj) is not None: store_reference(self, index, cobj._objects) - self._subarray(index)[0] = cobj._get_buffer_value() + address = self._buffer[0] + address += index * sizeof(self._type_) + cobj._copy_to(address) def __nonzero__(self): return self._buffer[0] != 0 diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py --- a/lib_pypy/_ctypes/structure.py +++ b/lib_pypy/_ctypes/structure.py @@ -291,6 +291,11 @@ def _get_buffer_value(self): return self._buffer.buffer + def _copy_to(self, addr): + from ctypes import memmove + origin = self._get_buffer_value() + memmove(addr, origin, self._fficompositesize_) + def _to_ffi_param(self): return self._buffer diff --git a/lib_pypy/_ctypes_test.py b/lib_pypy/_ctypes_test.py --- a/lib_pypy/_ctypes_test.py +++ b/lib_pypy/_ctypes_test.py @@ -21,5 +21,11 @@ with fp: imp.load_module('_ctypes_test', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass print('could not find _ctypes_test in %s' % output_dir) _pypy_testcapi.compile_shared('_ctypes_test.c', '_ctypes_test', output_dir) diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py --- a/lib_pypy/_sqlite3.py +++ b/lib_pypy/_sqlite3.py @@ -1028,21 +1028,25 @@ if '\0' in sql: raise ValueError("the query contains a null character") - first_word = sql.lstrip().split(" ")[0].upper() - if first_word == "": + + if sql: + first_word = sql.lstrip().split()[0].upper() + if first_word == '': + self._type = _STMT_TYPE_INVALID + if first_word == "SELECT": + self._type = _STMT_TYPE_SELECT + elif first_word == "INSERT": + self._type = _STMT_TYPE_INSERT + elif first_word == "UPDATE": + self._type = _STMT_TYPE_UPDATE + elif first_word == "DELETE": + self._type = _STMT_TYPE_DELETE + elif first_word == "REPLACE": + self._type = _STMT_TYPE_REPLACE + else: + self._type = _STMT_TYPE_OTHER + else: self._type = _STMT_TYPE_INVALID - elif first_word == "SELECT": - self._type = _STMT_TYPE_SELECT - elif first_word == "INSERT": - self._type = _STMT_TYPE_INSERT - elif first_word == "UPDATE": - self._type = _STMT_TYPE_UPDATE - elif first_word == "DELETE": - self._type = _STMT_TYPE_DELETE - elif first_word == "REPLACE": - self._type = _STMT_TYPE_REPLACE - else: - self._type = _STMT_TYPE_OTHER if isinstance(sql, unicode): sql = sql.encode('utf-8') diff --git a/lib_pypy/_testcapi.py b/lib_pypy/_testcapi.py --- a/lib_pypy/_testcapi.py +++ b/lib_pypy/_testcapi.py @@ -16,4 +16,10 @@ with fp: imp.load_module('_testcapi', fp, filename, description) except ImportError: + if os.name == 'nt': + # hack around finding compilers on win32 + try: + import setuptools + except ImportError: + pass _pypy_testcapi.compile_shared(cfile, '_testcapi', output_dir) diff --git a/lib_pypy/_tkinter/app.py b/lib_pypy/_tkinter/app.py --- a/lib_pypy/_tkinter/app.py +++ b/lib_pypy/_tkinter/app.py @@ -119,7 +119,7 @@ tklib.TCL_GLOBAL_ONLY) # This is used to get the application class for Tk 4.1 and up - argv0 = className.lower() + argv0 = className.lower().encode('ascii') tklib.Tcl_SetVar(self.interp, "argv0", argv0, tklib.TCL_GLOBAL_ONLY) @@ -180,6 +180,9 @@ if err == tklib.TCL_ERROR: self.raiseTclError() + def interpaddr(self): + return int(tkffi.cast('size_t', self.interp)) + def _var_invoke(self, func, *args, **kwargs): if self.threaded and self.thread_id != tklib.Tcl_GetCurrentThread(): # The current thread is not the interpreter thread. diff --git a/lib_pypy/cffi.egg-info/PKG-INFO b/lib_pypy/cffi.egg-info/PKG-INFO --- a/lib_pypy/cffi.egg-info/PKG-INFO +++ b/lib_pypy/cffi.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: cffi -Version: 1.11.1 +Version: 1.11.2 Summary: Foreign Function Interface for Python calling C code. Home-page: http://cffi.readthedocs.org Author: Armin Rigo, Maciej Fijalkowski diff --git a/lib_pypy/cffi/__init__.py b/lib_pypy/cffi/__init__.py --- a/lib_pypy/cffi/__init__.py +++ b/lib_pypy/cffi/__init__.py @@ -4,8 +4,8 @@ from .api import FFI from .error import CDefError, FFIError, VerificationError, VerificationMissing -__version__ = "1.11.1" -__version_info__ = (1, 11, 1) +__version__ = "1.11.2" +__version_info__ = (1, 11, 2) # The verifier module file names are based on the CRC32 of a string that # contains the following version number. It may be older than __version__ diff --git a/lib_pypy/cffi/_cffi_include.h b/lib_pypy/cffi/_cffi_include.h --- a/lib_pypy/cffi/_cffi_include.h +++ b/lib_pypy/cffi/_cffi_include.h @@ -238,9 +238,9 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char16_t(uint16_t x) { if (sizeof(_cffi_wchar_t) == 2) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else - return _cffi_from_c_wchar3216_t(x); + return _cffi_from_c_wchar3216_t((int)x); } _CFFI_UNUSED_FN static int _cffi_to_c_char32_t(PyObject *o) @@ -254,7 +254,7 @@ _CFFI_UNUSED_FN static PyObject *_cffi_from_c_char32_t(int x) { if (sizeof(_cffi_wchar_t) == 4) - return _cffi_from_c_wchar_t(x); + return _cffi_from_c_wchar_t((_cffi_wchar_t)x); else return _cffi_from_c_wchar3216_t(x); } diff --git a/lib_pypy/cffi/_embedding.h b/lib_pypy/cffi/_embedding.h --- a/lib_pypy/cffi/_embedding.h +++ b/lib_pypy/cffi/_embedding.h @@ -247,7 +247,7 @@ if (f != NULL && f != Py_None) { PyFile_WriteString("\nFrom: " _CFFI_MODULE_NAME - "\ncompiled with cffi version: 1.11.1" + "\ncompiled with cffi version: 1.11.2" "\n_cffi_backend module: ", f); modules = PyImport_GetModuleDict(); mod = PyDict_GetItemString(modules, "_cffi_backend"); diff --git a/pypy/doc/build.rst b/pypy/doc/build.rst --- a/pypy/doc/build.rst +++ b/pypy/doc/build.rst @@ -119,7 +119,7 @@ To run untranslated tests, you need the Boehm garbage collector libgc. -On recent Debian and Ubuntu (like 17.04), this is the command to install +On recent Debian and Ubuntu (16.04 onwards), this is the command to install all build-time dependencies:: apt-get install gcc make libffi-dev pkg-config zlib1g-dev libbz2-dev \ @@ -127,7 +127,7 @@ tk-dev libgc-dev python-cffi \ liblzma-dev libncursesw5-dev # these two only needed on PyPy3 -On older Debian and Ubuntu (12.04 to 16.04):: +On older Debian and Ubuntu (12.04-14.04):: apt-get install gcc make libffi-dev pkg-config libz-dev libbz2-dev \ libsqlite3-dev libncurses-dev libexpat1-dev libssl-dev libgdbm-dev \ @@ -149,12 +149,23 @@ xz-devel # For lzma on PyPy3. (XXX plus the SLES11 version of libgdbm-dev and tk-dev) -On Mac OS X, most of these build-time dependencies are installed alongside +On Mac OS X:: + +Most of these build-time dependencies are installed alongside the Developer Tools. However, note that in order for the installation to find them you may need to run:: xcode-select --install +An exception is OpenSSL, which is no longer provided with the operating +system. It can be obtained via Homebrew (with ``$ brew install openssl``), +but it will not be available on the system path by default. The easiest +way to enable it for building pypy is to set an environment variable:: + + export PKG_CONFIG_PATH=$(brew --prefix)/opt/openssl/lib/pkgconfig + +After setting this, translation (described next) will find the OpenSSL libs +as expected. Run the translation ------------------- @@ -187,18 +198,18 @@ entire pypy interpreter. This step is currently singe threaded, and RAM hungry. As part of this step, the chain creates a large number of C code files and a Makefile to compile them in a - directory controlled by the ``PYPY_USESSION_DIR`` environment variable. + directory controlled by the ``PYPY_USESSION_DIR`` environment variable. 2. Create an executable ``pypy-c`` by running the Makefile. This step can - utilize all possible cores on the machine. -3. Copy the needed binaries to the current directory. -4. Generate c-extension modules for any cffi-based stdlib modules. + utilize all possible cores on the machine. +3. Copy the needed binaries to the current directory. +4. Generate c-extension modules for any cffi-based stdlib modules. The resulting executable behaves mostly like a normal Python interpreter (see :doc:`cpython_differences`), and is ready for testing, for use as a base interpreter for a new virtualenv, or for packaging into a binary suitable for installation on another machine running the same OS as the build -machine. +machine. Note that step 4 is merely done as a convenience, any of the steps may be rerun without rerunning the previous steps. @@ -255,7 +266,7 @@ * PyPy 2.5.1 or earlier: normal users would see permission errors. Installers need to run ``pypy -c "import gdbm"`` and other similar - commands at install time; the exact list is in + commands at install time; the exact list is in :source:`pypy/tool/release/package.py `. Users seeing a broken installation of PyPy can fix it after-the-fact if they have sudo rights, by running once e.g. ``sudo pypy -c "import gdbm``. diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst --- a/pypy/doc/faq.rst +++ b/pypy/doc/faq.rst @@ -182,6 +182,57 @@ technical difficulties. +What about numpy, numpypy, micronumpy? +-------------------------------------- + +Way back in 2011, the PyPy team `started to reimplement`_ numpy in PyPy. It +has two pieces: + + * the builtin module :source:`pypy/module/micronumpy`: this is written in + RPython and roughly covers the content of the ``numpy.core.multiarray`` + module. Confusingly enough, this is available in PyPy under the name + ``_numpypy``. It is included by default in all the official releases of + PyPy (but it might be dropped in the future). + + * a fork_ of the official numpy repository maintained by us and informally + called ``numpypy``: even more confusing, the name of the repo on bitbucket + is ``numpy``. The main difference with the upstream numpy, is that it is + based on the micronumpy module written in RPython, instead of of + ``numpy.core.multiarray`` which is written in C. + +Moreover, it is also possible to install the upstream version of ``numpy``: +its core is written in C and it runs on PyPy under the cpyext compatibility +layer. This is what you get if you do ``pypy -m pip install numpy``. + + +Should I install numpy or numpypy? +----------------------------------- + +TL;DR version: you should use numpy. You can install it by doing ``pypy -m pip +install numpy``. You might also be interested in using the experimental `PyPy +binary wheels`_ to save compilation time. + +The upstream ``numpy`` is written in C, and runs under the cpyext +compatibility layer. Nowadays, cpyext is mature enough that you can simply +use the upstream ``numpy``, since it passes 99.9% of the test suite. At the +moment of writing (October 2017) the main drawback of ``numpy`` is that cpyext +is infamously slow, and thus it has worse performance compared to +``numpypy``. However, we are actively working on improving it, as we expect to +reach the same speed, eventually. + +On the other hand, ``numpypy`` is more JIT-friendly and very fast to call, +since it is written in RPython: but it is a reimplementation, and it's hard to +be completely compatible: over the years the project slowly matured and +eventually it was able to call out to the LAPACK and BLAS libraries to speed +matrix calculations, and reached around an 80% parity with the upstream +numpy. However, 80% is far from 100%. Since cpyext/numpy compatibility is +progressing fast, we have discontinued support for ``numpypy``. + +.. _`started to reimplement`: https://morepypy.blogspot.co.il/2011/05/numpy-in-pypy-status-and-roadmap.html +.. _fork: https://bitbucket.org/pypy/numpy +.. _`PyPy binary wheels`: https://github.com/antocuni/pypy-wheels + + Is PyPy more clever than CPython about Tail Calls? -------------------------------------------------- diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst --- a/pypy/doc/project-ideas.rst +++ b/pypy/doc/project-ideas.rst @@ -240,9 +240,12 @@ **matplotlib** https://github.com/matplotlib/matplotlib - TODO: the tkagg backend does not work, which makes tests fail on downstream - projects like Pandas, SciPy. It uses id(obj) as a c-pointer to obj in - tkagg.py, which requires refactoring + Status: using the matplotlib branch of PyPy and the tkagg-cffi branch of + matplotlib from https://github.com/mattip/matplotlib/tree/tkagg-cffi, the + tkagg backend can function. + + TODO: the matplotlib branch passes numpy arrays by value (copying all the + data), this proof-of-concept needs help to become completely compliant **wxPython** https://bitbucket.org/amauryfa/wxpython-cffi diff --git a/pypy/doc/release-v5.9.0.rst b/pypy/doc/release-v5.9.0.rst --- a/pypy/doc/release-v5.9.0.rst +++ b/pypy/doc/release-v5.9.0.rst @@ -10,18 +10,24 @@ This new PyPy2.7 release includes the upstream stdlib version 2.7.13, and PyPy3.5 includes the upstream stdlib version 3.5.3. -Only a handful of failing tests remain in NumPy and Pandas on PyPy2.7, issues -that appeared as excessive memory use were cleared up and other incompatibilities -were resolved. +NumPy and Pandas now work on PyPy2.7 (together with Cython 0.27.1). Issues +that appeared as excessive memory +use were cleared up and other incompatibilities were resolved. The C-API +compatibility layer does slow down code which crosses the python-c interface +often, we have ideas on how it could be improved, and still recommend +using pure python on PyPy or interfacing via CFFI_. Many other modules +based on C-API exentions now work on PyPy as well. -Cython 0.27 (released last week) should support more projects with PyPy, both -on PyPy2.7 and PyPy3.5 beta. +Cython 0.27.1 (released very recently) supports more projects with PyPy, both +on PyPy2.7 and PyPy3.5 beta. Note version **0.27.1** is now the minimum +version that supports this version of PyPy, due to some interactions with +updated C-API interface code. We optimized the JSON parser for recurring string keys, which should decrease memory use to 50% and increase parsing speed by up to 15% for large JSON files with many repeating dictionary keys (which is quite common). -CFFI_, which is part of the PyPy release, has been updated to 1.11, +CFFI_, which is part of the PyPy release, has been updated to 1.11.1, improving an already great package for interfacing with C. CFFI now supports complex arguments in API mode, as well as ``char16_t`` and ``char32_t`` and has improved support for callbacks. @@ -145,6 +151,7 @@ * Issue 2590_: fix the bounds in the GC when allocating a lot of objects with finalizers * Replace magical NOT RPYTHON comment with a decorator * Implement ``socket.sendmsg()``/``.recvmsg()`` for py3.5 + * Add ``memory_pressure`` for ``_SSLSocket`` objects * Degredations @@ -163,7 +170,8 @@ * Add support for ``_PyNamespace_New``, ``PyMemoryView_FromMemory``, ``Py_EnterRecursiveCall`` raising RecursionError, ``PyObject_LengthHint``, - ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject`` + ``PyUnicode_FromKindAndData``, ``PyDict_SetDefault``, ``PyGenObject``, + ``PyGenObject``, ``PyUnicode_Substring``, ``PyLong_FromUnicodeObject`` * Implement ``PyType_FromSpec`` (PEP 384) and fix issues with PEP 489 support * Support the new version of ``os.stat()`` on win32 * Use ``stat3()`` on Posix diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst --- a/pypy/doc/whatsnew-head.rst +++ b/pypy/doc/whatsnew-head.rst @@ -3,4 +3,29 @@ =========================== .. this is a revision shortly after release-pypy2.7-v5.9.0 -.. startrev:899e5245de1e +.. startrev:d56dadcef996 + +.. branch: cppyy-packaging +Cleanup and improve cppyy packaging + +.. branch: docs-osx-brew-openssl + +.. branch: keep-debug-symbols +Add a smartstrip tool, which can optionally keep the debug symbols in a +separate file, instead of just stripping them away. Use it in packaging + +.. branch: bsd-patches +Fix failures on FreeBSD, contributed by David Naylor as patches on the issue +tracker (issues 2694, 2695, 2696, 2697) + +.. branch: run-extra-tests +Run extra_tests/ in buildbot + +.. branch: vmprof-0.4.10 +Upgrade the _vmprof backend to vmprof 0.4.10 + +.. branch: fix-vmprof-stacklet-switch +Fix a vmprof+continulets (i.e. greenelts, eventlet, gevent, ...) + +.. branch: win32-vcvars + diff --git a/pypy/doc/whatsnew-pypy2-5.9.0.rst b/pypy/doc/whatsnew-pypy2-5.9.0.rst --- a/pypy/doc/whatsnew-pypy2-5.9.0.rst +++ b/pypy/doc/whatsnew-pypy2-5.9.0.rst @@ -85,3 +85,12 @@ .. branch: py_ssize_t Explicitly use Py_ssize_t as the Signed type in pypy c-api + +.. branch: cpyext-jit + +Differentiate the code to call METH_NOARGS, METH_O and METH_VARARGS in cpyext: +this allows to write specialized code which is much faster than previous +completely generic version. Moreover, let the JIT to look inside the cpyext +module: the net result is that cpyext calls are up to 7x faster. However, this +is true only for very simple situations: in all real life code, we are still +much slower than CPython (more optimizations to come) diff --git a/pypy/doc/windows.rst b/pypy/doc/windows.rst --- a/pypy/doc/windows.rst +++ b/pypy/doc/windows.rst @@ -25,8 +25,10 @@ This compiler, while the standard one for Python 2.7, is deprecated. Microsoft has made it available as the `Microsoft Visual C++ Compiler for Python 2.7`_ (the link -was checked in Nov 2016). Note that the compiler suite will be installed in -``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python``. +was checked in Nov 2016). Note that the compiler suite may be installed in +``C:\Users\\AppData\Local\Programs\Common\Microsoft\Visual C++ for Python`` +or in +``C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python``. A current version of ``setuptools`` will be able to find it there. For Windows 10, you must right-click the download, and under ``Properties`` -> ``Compatibility`` mark it as ``Run run this program in comatibility mode for`` @@ -41,7 +43,6 @@ ----------------------------------- We routinely test translation using v9, also known as Visual Studio 2008. -Our buildbot is still using the Express Edition, not the compiler noted above. Other configurations may work as well. The translation scripts will set up the appropriate environment variables @@ -81,6 +82,30 @@ .. _build instructions: http://pypy.org/download.html#building-from-source +Setting Up Visual Studio for building SSL in Python3 +---------------------------------------------------- + +On Python3, the ``ssl`` module is based on ``cffi``, and requires a build step after +translation. However ``distutils`` does not support the Micorosft-provided Visual C +compiler, and ``cffi`` depends on ``distutils`` to find the compiler. The +traditional solution to this problem is to install the ``setuptools`` module +via running ``-m ensurepip`` which installs ``pip`` and ``setuptools``. However +``pip`` requires ``ssl``. So we have a chicken-and-egg problem: ``ssl`` depends on +``cffi`` which depends on ``setuptools``, which depends on ``ensurepip``, which +depends on ``ssl``. + +In order to solve this, the buildbot sets an environment varaible that helps +``distutils`` find the compiler without ``setuptools``:: + + set VS90COMNTOOLS=C:\Program Files (x86)\Common Files\Microsoft\Visual C++ for Python\9.0\VC\bin + +or whatever is appropriate for your machine. Note that this is not enough, you +must also copy the ``vcvarsall.bat`` file fron the ``...\9.0`` directory to the +``...\9.0\VC`` directory, and edit it, changing the lines that set +``VCINSTALLDIR`` and ``WindowsSdkDir``:: + set VCINSTALLDIR=%~dp0\ + set WindowsSdkDir=%~dp0\..\WinSDK\ + Preparing Windows for the large build ------------------------------------- diff --git a/pypy/goal/getnightly.py b/pypy/goal/getnightly.py --- a/pypy/goal/getnightly.py +++ b/pypy/goal/getnightly.py @@ -15,7 +15,7 @@ arch = 'linux' cmd = 'wget "%s"' TAR_OPTIONS += ' --wildcards' - binfiles = "'*/bin/pypy' '*/bin/libpypy-c.so'" + binfiles = "'*/bin/pypy*' '*/bin/libpypy-c.so*'" if os.uname()[-1].startswith('arm'): arch += '-armhf-raspbian' elif sys.platform.startswith('darwin'): diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py --- a/pypy/interpreter/pyparser/future.py +++ b/pypy/interpreter/pyparser/future.py @@ -85,13 +85,17 @@ # permissive parsing of the given list of tokens; it relies on # the real parsing done afterwards to give errors. it.skip_newlines() - it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") - if it.skip(pygram.tokens.STRING): - it.skip_newlines() - while (it.skip_name("from") and + docstring_possible = True + while True: + it.skip_name("r") or it.skip_name("u") or it.skip_name("ru") + if docstring_possible and it.skip(pygram.tokens.STRING): + it.skip_newlines() + docstring_possible = False + if not (it.skip_name("from") and it.skip_name("__future__") and it.skip_name("import")): + break it.skip(pygram.tokens.LPAR) # optionally # return in 'last_position' any line-column pair that points # somewhere inside the last __future__ import statement diff --git a/pypy/interpreter/pyparser/test/test_future.py b/pypy/interpreter/pyparser/test/test_future.py --- a/pypy/interpreter/pyparser/test/test_future.py +++ b/pypy/interpreter/pyparser/test/test_future.py @@ -208,3 +208,13 @@ 'from __future__ import with_statement;') f = run(s, (2, 23)) assert f == fut.CO_FUTURE_DIVISION | fut.CO_FUTURE_WITH_STATEMENT + +def test_future_doc_future(): + # for some reason people do this :-[ + s = ''' +from __future__ import generators +"Docstring" +from __future__ import division + ''' + f = run(s, (4, 24)) + assert f == fut.CO_FUTURE_DIVISION | fut.CO_GENERATOR_ALLOWED diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py --- a/pypy/module/_cffi_backend/__init__.py +++ b/pypy/module/_cffi_backend/__init__.py @@ -3,7 +3,7 @@ from rpython.rlib import rdynload, clibffi from rpython.rtyper.lltypesystem import rffi -VERSION = "1.11.1" +VERSION = "1.11.2" FFI_DEFAULT_ABI = clibffi.FFI_DEFAULT_ABI try: diff --git a/pypy/module/_cffi_backend/cffi1_module.py b/pypy/module/_cffi_backend/cffi1_module.py --- a/pypy/module/_cffi_backend/cffi1_module.py +++ b/pypy/module/_cffi_backend/cffi1_module.py @@ -1,4 +1,5 @@ from rpython.rtyper.lltypesystem import lltype, rffi +from rpython.rlib import jit from pypy.interpreter.error import oefmt from pypy.interpreter.module import Module @@ -15,7 +16,7 @@ INITFUNCPTR = lltype.Ptr(lltype.FuncType([rffi.VOIDPP], lltype.Void)) - + at jit.dont_look_inside def load_cffi1_module(space, name, path, initptr): # This is called from pypy.module.cpyext.api.load_extension_module() from pypy.module._cffi_backend.call_python import get_ll_cffi_call_python diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py --- a/pypy/module/_cffi_backend/ctypeptr.py +++ b/pypy/module/_cffi_backend/ctypeptr.py @@ -156,10 +156,11 @@ class W_CTypePtrBase(W_CTypePtrOrArray): # base class for both pointers and pointers-to-functions - _attrs_ = ['is_void_ptr', 'is_voidchar_ptr'] - _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr'] + _attrs_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr'] + _immutable_fields_ = ['is_void_ptr', 'is_voidchar_ptr', 'is_onebyte_ptr'] is_void_ptr = False is_voidchar_ptr = False + is_onebyte_ptr = False def convert_to_object(self, cdata): ptrdata = rffi.cast(rffi.CCHARPP, cdata)[0] @@ -179,12 +180,20 @@ if self.is_void_ptr or other.is_void_ptr: pass # cast from or to 'void *' elif self.is_voidchar_ptr or other.is_voidchar_ptr: - space = self.space - msg = ("implicit cast from '%s' to '%s' " - "will be forbidden in the future (check that the types " - "are as you expect; use an explicit ffi.cast() if they " - "are correct)" % (other.name, self.name)) - space.warn(space.newtext(msg), space.w_UserWarning) + # for backward compatibility, accept "char *" as either + # source of target. This is not what C does, though, + # so emit a warning that will eventually turn into an + # error. The warning is turned off if both types are + # pointers to single bytes. + if self.is_onebyte_ptr and other.is_onebyte_ptr: + pass # no warning + else: + space = self.space + msg = ("implicit cast from '%s' to '%s' " + "will be forbidden in the future (check that the types " + "are as you expect; use an explicit ffi.cast() if they " + "are correct)" % (other.name, self.name)) + space.warn(space.newtext(msg), space.w_UserWarning) else: raise self._convert_error("compatible pointer", w_ob) @@ -214,6 +223,7 @@ self.is_void_ptr = isinstance(ctitem, ctypevoid.W_CTypeVoid) self.is_voidchar_ptr = (self.is_void_ptr or isinstance(ctitem, ctypeprim.W_CTypePrimitiveChar)) + self.is_onebyte_ptr = (ctitem.size == 1) W_CTypePtrBase.__init__(self, space, size, extra, 2, ctitem) def newp(self, w_init, allocator): diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py --- a/pypy/module/_cffi_backend/test/_backend_test_c.py +++ b/pypy/module/_cffi_backend/test/_backend_test_c.py @@ -1,7 +1,7 @@ # ____________________________________________________________ import sys -assert __version__ == "1.11.1", ("This test_c.py file is for testing a version" +assert __version__ == "1.11.2", ("This test_c.py file is for testing a version" " of cffi that differs from the one that we" " get from 'import _cffi_backend'") if sys.version_info < (3,): @@ -2099,7 +2099,8 @@ if sys.platform.startswith("linux"): BWChar = new_primitive_type("wchar_t") assert sizeof(BWChar) == 4 - assert int(cast(BWChar, -1)) == -1 # signed, on linux + # wchar_t is often signed on Linux, but not always (e.g. on ARM) + assert int(cast(BWChar, -1)) in (-1, 4294967295) def test_char16(): BChar16 = new_primitive_type("char16_t") @@ -3903,9 +3904,11 @@ BCharP = new_pointer_type(new_primitive_type("char")) BIntP = new_pointer_type(new_primitive_type("int")) BVoidP = new_pointer_type(new_void_type()) + BUCharP = new_pointer_type(new_primitive_type("unsigned char")) z1 = cast(BCharP, 0) z2 = cast(BIntP, 0) z3 = cast(BVoidP, 0) + z4 = cast(BUCharP, 0) with warnings.catch_warnings(record=True) as w: newp(new_pointer_type(BIntP), z1) # warn assert len(w) == 1 @@ -3919,6 +3922,12 @@ assert len(w) == 2 newp(new_pointer_type(BIntP), z3) # fine assert len(w) == 2 + newp(new_pointer_type(BCharP), z4) # fine (ignore signedness here) + assert len(w) == 2 + newp(new_pointer_type(BUCharP), z1) # fine (ignore signedness here) + assert len(w) == 2 + newp(new_pointer_type(BUCharP), z3) # fine + assert len(w) == 2 # check that the warnings are associated with lines in this file assert w[1].lineno == w[0].lineno + 4 diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py --- a/pypy/module/_codecs/interp_codecs.py +++ b/pypy/module/_codecs/interp_codecs.py @@ -66,20 +66,17 @@ "position %d from error handler out of bounds", newpos) replace = space.unicode_w(w_replace) - return replace, newpos + if decode: + return replace, newpos + else: + return replace, None, newpos return call_errorhandler def make_decode_errorhandler(self, space): return self._make_errorhandler(space, True) def make_encode_errorhandler(self, space): - errorhandler = self._make_errorhandler(space, False) - def encode_call_errorhandler(errors, encoding, reason, input, startpos, - endpos): - replace, newpos = errorhandler(errors, encoding, reason, input, - startpos, endpos) - return replace, None, newpos - return encode_call_errorhandler + return self._make_errorhandler(space, False) def get_unicodedata_handler(self, space): if self.unicodedata_handler: diff --git a/pypy/module/_continuation/test/conftest.py b/pypy/module/_continuation/test/conftest.py new file mode 100644 --- /dev/null +++ b/pypy/module/_continuation/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sys + +def pytest_configure(config): + if sys.platform.startswith('linux'): + from rpython.rlib.rvmprof.cintf import configure_libbacktrace_linux + configure_libbacktrace_linux() diff --git a/pypy/module/_continuation/test/test_stacklet.py b/pypy/module/_continuation/test/test_stacklet.py --- a/pypy/module/_continuation/test/test_stacklet.py +++ b/pypy/module/_continuation/test/test_stacklet.py @@ -8,6 +8,35 @@ cls.w_translated = cls.space.wrap( os.path.join(os.path.dirname(__file__), 'test_translated.py')) + cls.w_stack = cls.space.appexec([], """(): + import sys + def stack(f=None): + ''' + get the call-stack of the caller or the specified frame + ''' + if f is None: + f = sys._getframe(1) + res = [] + seen = set() + while f: + if f in seen: + # frame cycle + res.append('...') + break + if f.f_code.co_name == 'runtest': + # if we are running with -A, cut all the stack above + # the test function + break + seen.add(f) + res.append(f.f_code.co_name) + f = f.f_back + #print res + return res + return stack + """) + if cls.runappdirect: + # make sure that "self.stack" does not pass the self + cls.w_stack = staticmethod(cls.w_stack.im_func) def test_new_empty(self): from _continuation import continulet @@ -290,66 +319,100 @@ def test_random_switching(self): from _continuation import continulet # + seen = [] + # def t1(c1): - return c1.switch() + seen.append(3) + res = c1.switch() + seen.append(6) + return res + # def s1(c1, n): + seen.append(2) assert n == 123 c2 = t1(c1) - return c1.switch('a') + 1 + seen.append(7) + res = c1.switch('a') + 1 + seen.append(10) + return res # def s2(c2, c1): + seen.append(5) res = c1.switch(c2) + seen.append(8) assert res == 'a' - return c2.switch('b') + 2 + res = c2.switch('b') + 2 + seen.append(12) + return res # def f(): + seen.append(1) c1 = continulet(s1, 123) c2 = continulet(s2, c1) c1.switch() + seen.append(4) res = c2.switch() + seen.append(9) assert res == 'b' res = c1.switch(1000) + seen.append(11) assert res == 1001 - return c2.switch(2000) + res = c2.switch(2000) + seen.append(13) + return res # res = f() assert res == 2002 + assert seen == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] def test_f_back(self): import sys from _continuation import continulet + stack = self.stack # - def g(c): + def bar(c): + assert stack() == ['bar', 'foo', 'test_f_back'] c.switch(sys._getframe(0)) c.switch(sys._getframe(0).f_back) c.switch(sys._getframe(1)) + # + assert stack() == ['bar', 'foo', 'main', 'test_f_back'] c.switch(sys._getframe(1).f_back) - assert sys._getframe(2) is f3.f_back + # + assert stack() == ['bar', 'foo', 'main2', 'test_f_back'] + assert sys._getframe(2) is f3_foo.f_back c.switch(sys._getframe(2)) - def f(c): - g(c) + def foo(c): + bar(c) # - c = continulet(f) - f1 = c.switch() - assert f1.f_code.co_name == 'g' - f2 = c.switch() - assert f2.f_code.co_name == 'f' - f3 = c.switch() - assert f3 is f2 - assert f1.f_back is f3 + assert stack() == ['test_f_back'] + c = continulet(foo) + f1_bar = c.switch() + assert f1_bar.f_code.co_name == 'bar' + f2_foo = c.switch() + assert f2_foo.f_code.co_name == 'foo' + f3_foo = c.switch() + assert f3_foo is f2_foo + assert f1_bar.f_back is f3_foo + # def main(): - f4 = c.switch() - assert f4.f_code.co_name == 'main', repr(f4.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f4_main = c.switch() + assert f4_main.f_code.co_name == 'main' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack() == ['main', 'test_f_back'] + assert stack(f1_bar) == ['bar', 'foo', '...'] + # def main2(): - f5 = c.switch() - assert f5.f_code.co_name == 'main2', repr(f5.f_code.co_name) - assert f3.f_back is f1 # not running, so a loop + f5_main2 = c.switch() + assert f5_main2.f_code.co_name == 'main2' + assert f3_foo.f_back is f1_bar # not running, so a loop + assert stack(f1_bar) == ['bar', 'foo', '...'] + # main() main2() res = c.switch() assert res is None - assert f3.f_back is None + assert f3_foo.f_back is None def test_traceback_is_complete(self): import sys diff --git a/pypy/module/_continuation/test/test_translated.py b/pypy/module/_continuation/test/test_translated.py --- a/pypy/module/_continuation/test/test_translated.py +++ b/pypy/module/_continuation/test/test_translated.py @@ -5,6 +5,7 @@ py.test.skip("to run on top of a translated pypy-c") import sys, random +from rpython.tool.udir import udir # ____________________________________________________________ @@ -92,6 +93,33 @@ from pypy.conftest import option if not option.runappdirect: py.test.skip("meant only for -A run") + cls.w_vmprof_file = cls.space.wrap(str(udir.join('profile.vmprof'))) + + def test_vmprof(self): + """ + The point of this test is to check that we do NOT segfault. In + particular, we need to ensure that vmprof does not sample the stack in + the middle of a switch, else we read nonsense. + """ + try: + import _vmprof + except ImportError: + py.test.skip("no _vmprof") + # + def switch_forever(c): + while True: + c.switch() + # + f = open(self.vmprof_file, 'w+b') + _vmprof.enable(f.fileno(), 1/250.0, False, False, False, False) + c = _continuation.continulet(switch_forever) + for i in range(10**7): + if i % 100000 == 0: + print i + c.switch() + _vmprof.disable() + f.close() + def _setup(): for _i in range(20): diff --git a/pypy/module/_cppyy/__init__.py b/pypy/module/_cppyy/__init__.py --- a/pypy/module/_cppyy/__init__.py +++ b/pypy/module/_cppyy/__init__.py @@ -1,28 +1,27 @@ from pypy.interpreter.mixedmodule import MixedModule class Module(MixedModule): - "This module provides runtime bindings to C++ code for which reflection\n\ - info has been generated. Current supported back-ends are Reflex and CINT.\n\ - See http://doc.pypy.org/en/latest/cppyy.html for full details." + "This module brigdes the cppyy frontend with its backend, through PyPy.\n\ + See http://cppyy.readthedocs.io/en/latest for full details." interpleveldefs = { - '_load_dictionary' : 'interp_cppyy.load_dictionary', '_resolve_name' : 'interp_cppyy.resolve_name', '_scope_byname' : 'interp_cppyy.scope_byname', - '_template_byname' : 'interp_cppyy.template_byname', + '_is_template' : 'interp_cppyy.is_template', '_std_string_name' : 'interp_cppyy.std_string_name', '_set_class_generator' : 'interp_cppyy.set_class_generator', '_set_function_generator': 'interp_cppyy.set_function_generator', '_register_class' : 'interp_cppyy.register_class', '_get_nullptr' : 'interp_cppyy.get_nullptr', - 'CPPInstanceBase' : 'interp_cppyy.W_CPPInstance', + 'CPPClassBase' : 'interp_cppyy.W_CPPClass', 'addressof' : 'interp_cppyy.addressof', + '_bind_object' : 'interp_cppyy._bind_object', 'bind_object' : 'interp_cppyy.bind_object', + 'move' : 'interp_cppyy.move', } appleveldefs = { '_init_pythonify' : 'pythonify._init_pythonify', - 'load_reflection_info' : 'pythonify.load_reflection_info', 'add_pythonization' : 'pythonify.add_pythonization', 'Template' : 'pythonify.CPPTemplate', } diff --git a/pypy/module/_cppyy/backend/create_cppyy_package.py b/pypy/module/_cppyy/backend/create_cppyy_package.py deleted file mode 100755 --- a/pypy/module/_cppyy/backend/create_cppyy_package.py +++ /dev/null @@ -1,649 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function - -import os, sys -import argparse, re, shutil, tarfile, urllib2 - - -DEBUG_TESTBUILD = False - -TARBALL_CACHE_DIR = 'releases' - -ROOT_KEEP = ['build', 'cmake', 'config', 'core', 'etc', 'interpreter', - 'io', 'LICENSE', 'net', 'Makefile', 'CMakeLists.txt', 'math', - 'main'] # main only needed in more recent root b/c of rootcling -ROOT_CORE_KEEP = ['CMakeLists.txt', 'base', 'clib', 'clingutils', 'cont', - 'dictgen', 'foundation', 'lzma', 'macosx', 'meta', - 'metacling', 'metautils', 'rootcling_stage1', 'textinput', - 'thread', 'unix', 'utils', 'winnt', 'zip'] -ROOT_IO_KEEP = ['CMakeLists.txt', 'io', 'rootpcm'] -ROOT_NET_KEEP = ['CMakeLists.txt', 'net'] -ROOT_MATH_KEEP = ['CMakeLists.txt', 'mathcore'] -ROOT_ETC_KEEP = ['Makefile.arch', 'class.rules', 'cmake', 'dictpch', - 'gdb-backtrace.sh', 'gitinfo.txt', 'helgrind-root.supp', - 'hostcert.conf', 'system.plugins-ios', - 'valgrind-root-python.supp', 'valgrind-root.supp', 'vmc'] - -ROOT_EXPLICIT_REMOVE = ['core/base/v7', 'math/mathcore/v7', 'io/io/v7'] - - -ERR_RELEASE_NOT_FOUND = 2 - - -# -## CLI arguments -# -class ReleaseValidation(argparse.Action): - def __call__(self, parser, namespace, value, option_string=None): - if not re.match(r'6\.\d\d\.\d\d', value): - raise argparse.ArgumentTypeError( - "release number should of the form '6.dd.dd'") - setattr(namespace, self.dest, value) - return value - -parser = argparse.ArgumentParser( - description='Build PyPi package for cppyy containing the minimum of ROOT') -parser.add_argument('-r', '--release', type=str, nargs='?', - action=ReleaseValidation, help='ROOT release to use') - -args = parser.parse_args() - - -# -## ROOT source pull and cleansing -# -def clean_directory(directory, keeplist, trim_cmake=True): - removed_entries = [] - for entry in os.listdir(directory): - if entry[0] == '.' or entry in keeplist: - continue - removed_entries.append(entry) - entry = os.path.join(directory, entry) - print('now removing', entry) - if os.path.isdir(entry): - shutil.rmtree(entry) - else: - os.remove(entry) - - if not trim_cmake: - return - - # now take the removed entries out of the CMakeLists.txt - if removed_entries: - inp = os.path.join(directory, 'CMakeLists.txt') - print('trimming', inp) - outp = inp+'.new' - new_cml = open(outp, 'w') - for line in open(inp).readlines(): - if ('add_subdirectory' in line) or\ - ('COMMAND' in line and 'copy' in line) or\ - ('ROOT_ADD_TEST_SUBDIRECTORY' in line) or\ - ('install(DIRECTORY' in line): - for sub in removed_entries: - if sub in line: - line = '#'+line - break - new_cml.write(line) - new_cml.close() - os.rename(outp, inp) - else: - print('reusing existing %s/CMakeLists.txt' % (directory,)) - - -class ReleaseValidation(argparse.Action): - def __call__(self, parser, namespace, value, option_string=None): - if not re.match(r'6\.\d\d\.\d\d', value): - raise argparse.ArgumentTypeError( - "release number should of the form '6.dd.dd'") - setattr(namespace, self.dest, value) - return value - -parser = argparse.ArgumentParser( - description='Build PyPi package for cppyy containing the minimum of ROOT') -parser.add_argument('-r', '--release', type=str, nargs='?', - action=ReleaseValidation, help='ROOT release to use') - -args = parser.parse_args() - -if not os.path.exists(TARBALL_CACHE_DIR): - os.mkdir(TARBALL_CACHE_DIR) - -if args.release: - # use provided release - fn = 'root_v%s.source.tar.gz' % args.release - addr = 'https://root.cern.ch/download/'+fn - if not os.path.exists(os.path.join(TARBALL_CACHE_DIR, fn)): - try: - print('retrieving', fn) - resp = urllib2.urlopen(addr, fn) - out = open(os.path.join(TARBALL_CACHE_DIR, fn), 'wb') - out.write(resp.read()) - out.close() - except urllib2.HTTPError: - print('release %s not found' % args.release) - sys.exit(ERR_RELEASE_NOT_FOUND) - else: - print('reusing', fn, 'from local directory') -else: - print('provide release ... getting latest release is not yet implemented ...') - sys.exit(1) - # get latest and set fn, args.release, etc. - -# construct version for package -args.version = '' -testnext = False -for c in args.release: - if testnext: - testnext = False - if c == '0': - continue - if c == '.': - testnext = True - args.version += c -args.version += '.0' - -fn = os.path.join(TARBALL_CACHE_DIR, fn) -pkgdir = os.path.join('root-'+args.release) -if not os.path.exists(pkgdir): - print('now extracting', args.release) - tf = tarfile.TarFile.gzopen(fn) - tf.extractall() - tf.close() -else: - print('reusing existing directory', pkgdir) - -# remove everything except for the listed set of libraries -os.chdir(pkgdir) - -clean_directory(os.path.curdir, ROOT_KEEP) -clean_directory('core', ROOT_CORE_KEEP) -clean_directory('etc', ROOT_ETC_KEEP, trim_cmake=False) -clean_directory('io', ROOT_IO_KEEP) -clean_directory('math', ROOT_MATH_KEEP) -clean_directory('net', ROOT_NET_KEEP) - - -# trim main (only need rootcling) -print('trimming main') -for entry in os.listdir('main/src'): - if entry != 'rootcling.cxx': - os.remove('main/src/'+entry) -inp = 'main/CMakeLists.txt' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if ('ROOT_EXECUTABLE' in line or\ - 'SET_TARGET_PROPERTIES' in line) and\ - not 'rootcling' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - - -# remove afterimage and ftgl explicitly -print('trimming externals') -for cmf in ['AfterImage', 'FTGL']: - os.remove('cmake/modules/Find%s.cmake' % (cmf,)) -inp = 'cmake/modules/SearchInstalledSoftware.cmake' -outp = inp+'.new' -now_stripping = False -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if '#---Check for ftgl if needed' == line[0:28] or\ - '#---Check for AfterImage' == line[0:24]: - now_stripping = True - elif '#---Check' == line[0:9]: - now_stripping = False - if now_stripping: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -inp = 'cmake/modules/RootBuildOptions.cmake' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if 'ROOT_BUILD_OPTION(builtin_ftgl' in line or\ - 'ROOT_BUILD_OPTION(builtin_afterimage' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - - -# remove testing and examples -print('trimming testing') -inp = 'CMakeLists.txt' -outp = inp+'.new' -now_stripping = False -new_cml = open(outp, 'w') -for line in open(inp).readlines(): - if '#---Configure Testing using CTest' == line[0:33] or\ - '#---hsimple.root' == line[0:16]: - now_stripping = True - elif '#---Packaging' == line[0:13] or\ - '#---version' == line[0:11]: - now_stripping = False - if now_stripping: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -print('trimming RootCPack') -inp = 'cmake/modules/RootCPack.cmake' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp): - if 'README.txt' in line: - line = '#'+line - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -# some more explicit removes: -for dir_to_remove in ROOT_EXPLICIT_REMOVE: - try: - shutil.rmtree(dir_to_remove) - except OSError: - pass - -# special fixes -inp = 'core/base/src/TVirtualPad.cxx' -outp = inp+'.new' -new_cml = open(outp, 'w') -for line in open(inp): - if '#include "X3DBuffer.h"' == line[0:22]: - line = """//#include "X3DBuffer.h" -typedef struct _x3d_sizeof_ { - int numPoints; - int numSegs; - int numPolys; -} Size3D; -""" - new_cml.write(line) -new_cml.close() -os.rename(outp, inp) - -inp = 'math/mathcore/src/Fitter.cxx' -if os.path.exists(inp): - outp = inp+'.new' - new_cml = open(outp, 'w') - for line in open(inp): - if '#include "TF1.h"' in line: - continue - new_cml.write(line) - new_cml.close() - os.rename(outp, inp) - -# done -os.chdir(os.path.pardir) - -# debugging: run a test build -if DEBUG_TESTBUILD: - print('running a debug test build') - tb = "test_builddir" - if os.path.exists(tb): - shutil.rmtree(tb) - os.mkdir(tb) - os.chdir(tb) - os.system('cmake ../%s -DCMAKE_INSTALL_PREFIX=../install -Dminimal=ON -Dasimage=OFF' % pkgdir) - os.system('make -j 32') - - -# -## package creation -# -countdown = 0 -pidir = 'Package-'+args.release -print('creating package', pidir) -if not os.path.exists(pidir): - os.mkdir(pidir) -os.chdir(pidir); countdown += 1 - -print('creating LICENSE.txt') -with open('LICENSE.txt', 'w') as outp: - outp.write("""There are three main parts: - - LLVM: distributed under University of Illinois/NCSA Open Source License - https://opensource.org/licenses/UoI-NCSA.php - ROOT: distributed under LGPL 2.1 - https://root.cern.ch/license - Cppyy: distributed under LBNL BSD - https://fedoraproject.org/wiki/Licensing/LBNLBSD -""") - -print('creating MANIFEST.in') -with open('MANIFEST.in', 'w') as outp: - outp.write("""# Include the license file -include LICENSE.txt - -# Include the data files -recursive-include src * -""") - -print('creating README.rst') -with open('README.rst', 'w') as outp: - outp.write("""PyPy cling-support -================== - ----- - -Find the documentation here: - http://doc.pypy.org/en/latest/cppyy.html -""") - -print('creating setup.cfg') -with open('setup.cfg', 'w') as outp: - outp.write("""[bdist_wheel] -universal=0 -""") - -print('creating setup.py') -with open('setup.py', 'w') as outp: - outp.write("""import os, sys, subprocess -from setuptools import setup, find_packages -from distutils import log -from distutils.command.build import build as _build -from setuptools.command.install import install as _install -from distutils.sysconfig import get_python_lib -from distutils.errors import DistutilsSetupError -from codecs import open - -here = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() - -builddir = None -def get_builddir(): - global builddir - if builddir is None: - topdir = os.getcwd() - builddir = os.path.join(topdir, 'builddir') - return builddir - -srcdir = None -def get_srcdir(): - global srcdir - if srcdir is None: - topdir = os.getcwd() - srcdir = os.path.join(topdir, 'src', 'backend') - return srcdir - -class my_cmake_build(_build): - def __init__(self, dist, *args, **kwargs): - _build.__init__(self, dist, *args, **kwargs) - # TODO: can't seem to find a better way of getting hold of - # the install_lib parameter during the build phase ... - prefix = '' - try: - prefix = dist.get_command_obj('install').install_lib - except AttributeError: - pass - if not prefix: - prefix = get_python_lib(1, 0) - self.prefix = os.path.join(prefix, 'cppyy_backend') - - def run(self): - # base run - _build.run(self) - - # custom run - log.info('Now building libcppyy_backend.so and dependencies') - builddir = get_builddir() - srcdir = get_srcdir() - if not os.path.exists(builddir): - log.info('Creating build directory %s ...' % builddir) - os.makedirs(builddir) - - os.chdir(builddir) - log.info('Running cmake for cppyy_backend') - if subprocess.call([ - 'cmake', srcdir, '-Dminimal=ON -Dasimage=OFF', - '-DCMAKE_INSTALL_PREFIX='+self.prefix]) != 0: - raise DistutilsSetupError('Failed to configure cppyy_backend') - - nprocs = os.getenv("MAKE_NPROCS") - if nprocs: - try: - ival = int(nprocs) - nprocs = '-j'+nprocs - except ValueError: - log.warn("Integer expected for MAKE_NPROCS, but got %s (ignored)", nprocs) - nprocs = '-j1' - else: - nprocs = '-j1' - log.info('Now building cppyy_backend and dependencies ...') - if subprocess.call(['make', nprocs]) != 0: - raise DistutilsSetupError('Failed to build cppyy_backend') - - log.info('build finished') - -class my_libs_install(_install): - def run(self): - # base install - _install.run(self) - - # custom install - log.info('Now installing libcppyy_backend.so and dependencies') - builddir = get_builddir() - if not os.path.exists(builddir): - raise DistutilsSetupError('Failed to find build dir!') - os.chdir(builddir) - - prefix = self.install_lib - log.info('Now installing in %s ...', prefix) - if subprocess.call(['make', 'install']) != 0: - raise DistutilsSetupError('Failed to install cppyy_backend') - - log.info('install finished') - - def get_outputs(self): - outputs = _install.get_outputs(self) - outputs.append(os.path.join(self.install_lib, 'cppyy_backend')) - return outputs - -setup( - name='PyPy-cppyy-backend', -""") - outp.write(" version='%s', # corresponds to ROOT %s, extra number is for packager\n"\ - % (args.version, args.release)) - outp.write(""" description='Cling support for PyPy', - long_description=long_description, - - url='http://pypy.org', - - # Author details - author='PyPy Developers', - author_email='pypy-dev at python.org', - - license='LLVM: UoI-NCSA; ROOT: LGPL 2.1; Cppyy: LBNL BSD', - - classifiers=[ - 'Development Status :: 4 - Beta', - - 'Intended Audience :: Developers', - - 'Topic :: Software Development', - 'Topic :: Software Development :: Interpreters', - - #'License :: OSI Approved :: MIT License', - - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Programming Language :: C', - 'Programming Language :: C++', - - 'Natural Language :: English' - ], - - keywords='interpreter development', - - packages=find_packages('src', ['backend']), - include_package_data=True, - - extras_require={ - }, - - cmdclass = { - 'build': my_cmake_build, - 'install': my_libs_install, - }, -) -""") - - -print('creating src ... ROOT part') -if not os.path.exists('src'): - os.mkdir('src') -os.chdir('src'); countdown += 1 -if not os.path.exists('backend'): - src = os.path.join(os.path.pardir, os.path.pardir, pkgdir) - print('now copying', src) - shutil.copytree(src, 'backend') - -print('creating src ... cppyy part') -os.chdir('backend'); countdown += 1 -if not os.path.exists('cppyy'): - os.mkdir('cppyy') - os.chdir('cppyy'); countdown += 1 - - with open('CMakeLists.txt', 'w') as outp: - outp.write("""############################################################################ -# CMakeLists.txt file for building cppyy package -############################################################################ - From pypy.commits at gmail.com Thu Nov 30 14:41:25 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 30 Nov 2017 11:41:25 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Module names inside a zip are not fsencoded - they can be any str Message-ID: <5a205ee5.31a9df0a.42891.7e34@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93226:f1a556ffff93 Date: 2017-11-30 19:39 +0000 http://bitbucket.org/pypy/pypy/changeset/f1a556ffff93/ Log: Module names inside a zip are not fsencoded - they can be any str diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -47,7 +47,7 @@ # THIS IS A TERRIBLE HACK TO BE CPYTHON COMPATIBLE def getitem(self, space, w_name): - return self._getitem(space, space.fsencode_w(w_name)) + return self._getitem(space, space.text_w(w_name)) def _getitem(self, space, name): try: @@ -90,14 +90,14 @@ def iteritems(self, space): return space.iter(self.items(space)) - @unwrap_spec(name='fsencode') + @unwrap_spec(name='text') def contains(self, space, name): return space.newbool(name in self.cache) def clear(self, space): self.cache = {} - @unwrap_spec(name='fsencode') + @unwrap_spec(name='text') def delitem(self, space, name): del self.cache[name] @@ -221,7 +221,7 @@ except KeyError: return False - @unwrap_spec(fullname='fsencode') + @unwrap_spec(fullname='text') def find_module(self, space, fullname, w_path=None): filename = self.make_filename(fullname) for _, _, ext in ENUMERATE_EXTS: @@ -247,7 +247,7 @@ return self.filename + os.path.sep + filename def load_module(self, space, w_fullname): - fullname = space.fsencode_w(w_fullname) + fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for compiled, is_package, ext in ENUMERATE_EXTS: fname = filename + ext @@ -287,7 +287,7 @@ raise raise oefmt(get_error(space), "can't find module %R", w_fullname) - @unwrap_spec(filename='fsencode') + @unwrap_spec(filename='text') def get_data(self, space, filename): filename = self._find_relative_path(filename) try: @@ -301,7 +301,7 @@ raise zlib_error(space, e.msg) def get_code(self, space, w_fullname): - fullname = space.fsencode_w(w_fullname) + fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for compiled, _, ext in ENUMERATE_EXTS: if self.have_modulefile(space, filename + ext): @@ -325,7 +325,7 @@ "Cannot find source or code for %R in %R", w_fullname, space.newfilename(self.name)) - @unwrap_spec(fullname='fsencode') + @unwrap_spec(fullname='text') def get_source(self, space, fullname): filename = self.make_filename(fullname) found = False @@ -348,7 +348,7 @@ space.newfilename(self.name)) def get_filename(self, space, w_fullname): - fullname = space.fsencode_w(w_fullname) + fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for _, is_package, ext in ENUMERATE_EXTS: if self.have_modulefile(space, filename + ext): @@ -360,7 +360,7 @@ space.newfilename(self.name)) def is_package(self, space, w_fullname): - fullname = space.fsencode_w(w_fullname) + fullname = space.text_w(w_fullname) filename = self.make_filename(fullname) for _, is_package, ext in ENUMERATE_EXTS: if self.have_modulefile(space, filename + ext): @@ -385,7 +385,7 @@ return True, self.filename + os.path.sep + self.corr_zname(dirpath) return False, None - @unwrap_spec(fullname='fsencode') + @unwrap_spec(fullname='text') def find_loader(self, space, fullname, w_path=None): found, ns_portion = self._find_loader(space, fullname) if not found: @@ -401,9 +401,9 @@ name = space.fsencode_w(w_name) ok = False parts_ends = [i for i in range(0, len(name)) - if name[i] == os.path.sep or name[i] == ZIPSEP] + if name[i] == os.path.sep or name[i] == ZIPSEP] parts_ends.append(len(name)) - filename = "" # make annotator happy + filename = "" # make annotator happy for i in parts_ends: filename = name[:i] if not filename: From pypy.commits at gmail.com Thu Nov 30 15:43:39 2017 From: pypy.commits at gmail.com (arigo) Date: Thu, 30 Nov 2017 12:43:39 -0800 (PST) Subject: [pypy-commit] pypy unicode-utf8: I think this is a speed-up Message-ID: <5a206d7b.e1acdf0a.492d7.9357@mx.google.com> Author: Armin Rigo Branch: unicode-utf8 Changeset: r93227:91d2d71881e2 Date: 2017-11-30 21:43 +0100 http://bitbucket.org/pypy/pypy/changeset/91d2d71881e2/ Log: I think this is a speed-up diff --git a/rpython/rlib/rutf8.py b/rpython/rlib/rutf8.py --- a/rpython/rlib/rutf8.py +++ b/rpython/rlib/rutf8.py @@ -86,8 +86,8 @@ """Gives the position of the next codepoint after pos. Assumes valid utf8. 'pos' must be before the end of the string. """ + assert pos >= 0 chr1 = ord(code[pos]) - assert pos >= 0 if chr1 <= 0x7F: return pos + 1 if chr1 <= 0xDF: From pypy.commits at gmail.com Thu Nov 30 21:08:39 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 30 Nov 2017 18:08:39 -0800 (PST) Subject: [pypy-commit] pypy py3.5: .pyo suffix is meaningless now (PEP 488) Message-ID: <5a20b9a7.c9061c0a.97b5f.914f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93228:f8e7ad765a37 Date: 2017-12-01 02:06 +0000 http://bitbucket.org/pypy/pypy/changeset/f8e7ad765a37/ Log: .pyo suffix is meaningless now (PEP 488) diff --git a/pypy/interpreter/app_main.py b/pypy/interpreter/app_main.py --- a/pypy/interpreter/app_main.py +++ b/pypy/interpreter/app_main.py @@ -729,7 +729,7 @@ SourceFileLoader, SourcelessFileLoader) if IS_WINDOWS: filename = filename.lower() - if filename.endswith('.pyc') or filename.endswith('.pyo'): + if filename.endswith('.pyc'): # We don't actually load via SourcelessFileLoader # because '__main__' must not be listed inside # 'importlib._bootstrap._module_locks' (it deadlocks diff --git a/pypy/interpreter/mixedmodule.py b/pypy/interpreter/mixedmodule.py --- a/pypy/interpreter/mixedmodule.py +++ b/pypy/interpreter/mixedmodule.py @@ -254,7 +254,7 @@ assert typ == imp.PY_SOURCE source = file.read() file.close() - if fn.endswith('.pyc') or fn.endswith('.pyo'): + if fn.endswith('.pyc'): fn = fn[:-1] app = gateway.applevel(source, filename=fn, modname=appname) applevelcache[impbase] = app diff --git a/pypy/module/imp/interp_imp.py b/pypy/module/imp/interp_imp.py --- a/pypy/module/imp/interp_imp.py +++ b/pypy/module/imp/interp_imp.py @@ -27,7 +27,7 @@ def get_tag(space): """get_tag() -> string - Return the magic tag for .pyc or .pyo files.""" + Return the magic tag for .pyc files.""" return space.newtext(importing.PYC_TAG) def get_file(space, w_file, filename, filemode): diff --git a/pypy/module/imp/test/test_app.py b/pypy/module/imp/test/test_app.py --- a/pypy/module/imp/test/test_app.py +++ b/pypy/module/imp/test/test_app.py @@ -85,7 +85,7 @@ assert suffix == '.py' assert mode == 'r' elif type == imp.PY_COMPILED: - assert suffix in ('.pyc', '.pyo') + assert suffix == '.pyc' assert mode == 'rb' elif type == imp.C_EXTENSION: assert suffix.endswith(('.pyd', '.so')) diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -18,10 +18,8 @@ ENUMERATE_EXTS = unrolling_iterable( [(True, True, ZIPSEP + '__init__.pyc'), - (True, True, ZIPSEP + '__init__.pyo'), (False, True, ZIPSEP + '__init__.py'), (True, False, '.pyc'), - (True, False, '.pyo'), (False, False, '.py')]) class Cache: diff --git a/pypy/sandbox/pypy_interact.py b/pypy/sandbox/pypy_interact.py --- a/pypy/sandbox/pypy_interact.py +++ b/pypy/sandbox/pypy_interact.py @@ -46,7 +46,7 @@ # * can access its own executable # * can access the pure Python libraries # * can access the temporary usession directory as /tmp - exclude = ['.pyc', '.pyo'] + exclude = ['.pyc'] if self.tmpdir is None: tmpdirnode = Dir({}) else: @@ -57,7 +57,7 @@ 'bin': Dir({ 'pypy3-c': RealFile(self.executable, mode=0111), 'lib-python': RealDir(os.path.join(libroot, 'lib-python'), - exclude=exclude), + exclude=exclude), 'lib_pypy': RealDir(os.path.join(libroot, 'lib_pypy'), exclude=exclude), }), @@ -66,7 +66,7 @@ def main(): from getopt import getopt # and not gnu_getopt! - options, arguments = getopt(sys.argv[1:], 't:hv', + options, arguments = getopt(sys.argv[1:], 't:hv', ['tmp=', 'heapsize=', 'timeout=', 'log=', 'verbose', 'help']) tmpdir = None From pypy.commits at gmail.com Thu Nov 30 21:27:49 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 30 Nov 2017 18:27:49 -0800 (PST) Subject: [pypy-commit] pypy py3.5: Module names inside a zip are not fsencoded, part 2 Message-ID: <5a20be25.02be1c0a.c0baf.304f@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93229:8309e6092c02 Date: 2017-12-01 02:25 +0000 http://bitbucket.org/pypy/pypy/changeset/8309e6092c02/ Log: Module names inside a zip are not fsencoded, part 2 diff --git a/pypy/module/zipimport/interp_zipimport.py b/pypy/module/zipimport/interp_zipimport.py --- a/pypy/module/zipimport/interp_zipimport.py +++ b/pypy/module/zipimport/interp_zipimport.py @@ -1,3 +1,5 @@ +import os +import stat from pypy.interpreter.baseobjspace import W_Root from pypy.interpreter.error import OperationError, oefmt @@ -9,8 +11,6 @@ from rpython.rlib.unroll import unrolling_iterable from rpython.rlib.rzipfile import RZipFile, BadZipfile from rpython.rlib.rzlib import RZlibError -import os -import stat ZIPSEP = '/' # note that zipfiles always use slash, but for OSes with other @@ -145,7 +145,7 @@ return fname def import_py_file(self, space, modname, filename, buf, pkgpath): - w_mod = Module(space, space.newfilename(modname)) + w_mod = Module(space, space.newtext(modname)) real_name = self.filename + os.path.sep + self.corr_zname(filename) space.setattr(w_mod, space.newtext('__loader__'), self) importing._prepare_module(space, w_mod, real_name, pkgpath) From pypy.commits at gmail.com Thu Nov 30 21:27:51 2017 From: pypy.commits at gmail.com (rlamy) Date: Thu, 30 Nov 2017 18:27:51 -0800 (PST) Subject: [pypy-commit] pypy py3.5: hg merge default Message-ID: <5a20be27.11c6df0a.4c8c1.437c@mx.google.com> Author: Ronan Lamy Branch: py3.5 Changeset: r93230:c932756506d4 Date: 2017-12-01 02:26 +0000 http://bitbucket.org/pypy/pypy/changeset/c932756506d4/ Log: hg merge default diff --git a/rpython/jit/metainterp/optimizeopt/intutils.py b/rpython/jit/metainterp/optimizeopt/intutils.py --- a/rpython/jit/metainterp/optimizeopt/intutils.py +++ b/rpython/jit/metainterp/optimizeopt/intutils.py @@ -273,7 +273,8 @@ return r def contains(self, val): - assert not isinstance(val, long) + if not we_are_translated(): + assert not isinstance(val, long) if not isinstance(val, int): if ((not self.has_lower or self.lower == MININT) and not self.has_upper or self.upper == MAXINT):